{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 4.915816640039327, "eval_steps": 500, "global_step": 280000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017556488000140453, "grad_norm": 5.521957336675931, "learning_rate": 3.160112359550562e-07, "loss": 1.0812, "step": 10 }, { "epoch": 0.00035112976000280905, "grad_norm": 5.708572602993713, "learning_rate": 6.671348314606742e-07, "loss": 1.0907, "step": 20 }, { "epoch": 0.0005266946400042135, "grad_norm": 5.375281817925661, "learning_rate": 1.0182584269662922e-06, "loss": 1.0868, "step": 30 }, { "epoch": 0.0007022595200056181, "grad_norm": 5.052791845417721, "learning_rate": 1.3693820224719102e-06, "loss": 1.0727, "step": 40 }, { "epoch": 0.0008778244000070225, "grad_norm": 3.8611207439866524, "learning_rate": 1.7205056179775281e-06, "loss": 1.0479, "step": 50 }, { "epoch": 0.001053389280008427, "grad_norm": 3.6465595676164804, "learning_rate": 2.071629213483146e-06, "loss": 1.0199, "step": 60 }, { "epoch": 0.0012289541600098315, "grad_norm": 1.3902749235806497, "learning_rate": 2.4227528089887643e-06, "loss": 0.9591, "step": 70 }, { "epoch": 0.0014045190400112362, "grad_norm": 1.0776370002359033, "learning_rate": 2.773876404494382e-06, "loss": 0.9458, "step": 80 }, { "epoch": 0.0015800839200126406, "grad_norm": 0.5087540307713369, "learning_rate": 3.125e-06, "loss": 0.9419, "step": 90 }, { "epoch": 0.001755648800014045, "grad_norm": 0.34249186022454586, "learning_rate": 3.4761235955056183e-06, "loss": 0.9196, "step": 100 }, { "epoch": 0.0019312136800154497, "grad_norm": 0.3453262311951503, "learning_rate": 3.827247191011236e-06, "loss": 0.9236, "step": 110 }, { "epoch": 0.002106778560016854, "grad_norm": 0.2403328842081639, "learning_rate": 4.178370786516854e-06, "loss": 0.9158, "step": 120 }, { "epoch": 0.002282343440018259, "grad_norm": 0.1820471162175142, "learning_rate": 4.529494382022472e-06, "loss": 0.9108, "step": 130 }, { "epoch": 0.002457908320019663, "grad_norm": 0.19566371419518408, "learning_rate": 4.88061797752809e-06, "loss": 0.9078, "step": 140 }, { "epoch": 0.0026334732000210677, "grad_norm": 0.14067728487599765, "learning_rate": 5.231741573033709e-06, "loss": 0.9071, "step": 150 }, { "epoch": 0.0028090380800224724, "grad_norm": 0.15601909764113486, "learning_rate": 5.582865168539326e-06, "loss": 0.9036, "step": 160 }, { "epoch": 0.0029846029600238766, "grad_norm": 0.13106316674985408, "learning_rate": 5.933988764044944e-06, "loss": 0.8971, "step": 170 }, { "epoch": 0.0031601678400252813, "grad_norm": 0.23159646369429554, "learning_rate": 6.285112359550562e-06, "loss": 0.9001, "step": 180 }, { "epoch": 0.003335732720026686, "grad_norm": 0.139445412751372, "learning_rate": 6.6362359550561805e-06, "loss": 0.9069, "step": 190 }, { "epoch": 0.00351129760002809, "grad_norm": 0.12232303227649992, "learning_rate": 6.987359550561798e-06, "loss": 0.8973, "step": 200 }, { "epoch": 0.003686862480029495, "grad_norm": 0.09774383208297045, "learning_rate": 7.338483146067417e-06, "loss": 0.8984, "step": 210 }, { "epoch": 0.0038624273600308995, "grad_norm": 0.09872372482967814, "learning_rate": 7.689606741573034e-06, "loss": 0.9006, "step": 220 }, { "epoch": 0.004037992240032304, "grad_norm": 0.09927394772845226, "learning_rate": 8.040730337078651e-06, "loss": 0.8925, "step": 230 }, { "epoch": 0.004213557120033708, "grad_norm": 0.10205270462284845, "learning_rate": 8.391853932584271e-06, "loss": 0.897, "step": 240 }, { "epoch": 0.004389122000035113, "grad_norm": 0.08598854537659635, "learning_rate": 8.742977528089889e-06, "loss": 0.8952, "step": 250 }, { "epoch": 0.004564686880036518, "grad_norm": 0.14052247056430883, "learning_rate": 9.094101123595505e-06, "loss": 0.903, "step": 260 }, { "epoch": 0.004740251760037922, "grad_norm": 0.11633339538747048, "learning_rate": 9.445224719101124e-06, "loss": 0.8993, "step": 270 }, { "epoch": 0.004915816640039326, "grad_norm": 0.15667199732434572, "learning_rate": 9.796348314606742e-06, "loss": 0.8926, "step": 280 }, { "epoch": 0.005091381520040731, "grad_norm": 0.1580863352124487, "learning_rate": 1.0147471910112361e-05, "loss": 0.8906, "step": 290 }, { "epoch": 0.0052669464000421355, "grad_norm": 0.11700961833744775, "learning_rate": 1.0498595505617977e-05, "loss": 0.8911, "step": 300 }, { "epoch": 0.00544251128004354, "grad_norm": 0.1002158140214453, "learning_rate": 1.0849719101123595e-05, "loss": 0.8964, "step": 310 }, { "epoch": 0.005618076160044945, "grad_norm": 0.11450525112897186, "learning_rate": 1.1200842696629214e-05, "loss": 0.8914, "step": 320 }, { "epoch": 0.0057936410400463495, "grad_norm": 0.10355642114335632, "learning_rate": 1.1551966292134832e-05, "loss": 0.8935, "step": 330 }, { "epoch": 0.005969205920047753, "grad_norm": 0.13003862574172384, "learning_rate": 1.190308988764045e-05, "loss": 0.8923, "step": 340 }, { "epoch": 0.006144770800049158, "grad_norm": 0.11942176095930303, "learning_rate": 1.2254213483146068e-05, "loss": 0.8976, "step": 350 }, { "epoch": 0.006320335680050563, "grad_norm": 0.174729880743143, "learning_rate": 1.2605337078651685e-05, "loss": 0.8954, "step": 360 }, { "epoch": 0.006495900560051967, "grad_norm": 0.17454279494445282, "learning_rate": 1.2956460674157303e-05, "loss": 0.8842, "step": 370 }, { "epoch": 0.006671465440053372, "grad_norm": 0.11093942310863414, "learning_rate": 1.3307584269662923e-05, "loss": 0.8938, "step": 380 }, { "epoch": 0.0068470303200547766, "grad_norm": 0.12459459333977055, "learning_rate": 1.365870786516854e-05, "loss": 0.9032, "step": 390 }, { "epoch": 0.00702259520005618, "grad_norm": 0.10478445850007473, "learning_rate": 1.4009831460674158e-05, "loss": 0.8899, "step": 400 }, { "epoch": 0.007198160080057585, "grad_norm": 0.21758319521997785, "learning_rate": 1.4360955056179776e-05, "loss": 0.8911, "step": 410 }, { "epoch": 0.00737372496005899, "grad_norm": 0.3799682257333132, "learning_rate": 1.4712078651685394e-05, "loss": 0.8952, "step": 420 }, { "epoch": 0.007549289840060394, "grad_norm": 0.16613723696367122, "learning_rate": 1.5063202247191011e-05, "loss": 0.8957, "step": 430 }, { "epoch": 0.007724854720061799, "grad_norm": 0.10950583373419281, "learning_rate": 1.541432584269663e-05, "loss": 0.8922, "step": 440 }, { "epoch": 0.007900419600063203, "grad_norm": 0.11813836136178629, "learning_rate": 1.5765449438202247e-05, "loss": 0.8824, "step": 450 }, { "epoch": 0.008075984480064607, "grad_norm": 0.13696044256853676, "learning_rate": 1.6116573033707866e-05, "loss": 0.8916, "step": 460 }, { "epoch": 0.008251549360066012, "grad_norm": 0.2135809828531423, "learning_rate": 1.6467696629213482e-05, "loss": 0.8898, "step": 470 }, { "epoch": 0.008427114240067417, "grad_norm": 0.12168929213785253, "learning_rate": 1.68188202247191e-05, "loss": 0.893, "step": 480 }, { "epoch": 0.008602679120068821, "grad_norm": 0.1845567646344545, "learning_rate": 1.716994382022472e-05, "loss": 0.891, "step": 490 }, { "epoch": 0.008778244000070226, "grad_norm": 0.15251799572779476, "learning_rate": 1.7521067415730337e-05, "loss": 0.8886, "step": 500 }, { "epoch": 0.00895380888007163, "grad_norm": 0.2092028472029786, "learning_rate": 1.7872191011235957e-05, "loss": 0.8878, "step": 510 }, { "epoch": 0.009129373760073035, "grad_norm": 0.11402792829918096, "learning_rate": 1.8223314606741573e-05, "loss": 0.8819, "step": 520 }, { "epoch": 0.00930493864007444, "grad_norm": 0.11373947249157612, "learning_rate": 1.8574438202247192e-05, "loss": 0.8886, "step": 530 }, { "epoch": 0.009480503520075845, "grad_norm": 0.16039738696762704, "learning_rate": 1.892556179775281e-05, "loss": 0.8836, "step": 540 }, { "epoch": 0.00965606840007725, "grad_norm": 0.09881854664635709, "learning_rate": 1.9276685393258428e-05, "loss": 0.8869, "step": 550 }, { "epoch": 0.009831633280078652, "grad_norm": 0.18258558704333444, "learning_rate": 1.9627808988764047e-05, "loss": 0.8813, "step": 560 }, { "epoch": 0.010007198160080057, "grad_norm": 0.1383098131580563, "learning_rate": 1.9978932584269663e-05, "loss": 0.8838, "step": 570 }, { "epoch": 0.010182763040081462, "grad_norm": 0.23244818037178885, "learning_rate": 2.0330056179775282e-05, "loss": 0.8871, "step": 580 }, { "epoch": 0.010358327920082866, "grad_norm": 0.17421140066252747, "learning_rate": 2.0681179775280902e-05, "loss": 0.8878, "step": 590 }, { "epoch": 0.010533892800084271, "grad_norm": 0.12457583224780955, "learning_rate": 2.1032303370786518e-05, "loss": 0.887, "step": 600 }, { "epoch": 0.010709457680085676, "grad_norm": 0.11724554014808711, "learning_rate": 2.1383426966292137e-05, "loss": 0.8806, "step": 610 }, { "epoch": 0.01088502256008708, "grad_norm": 0.1244203189846869, "learning_rate": 2.1734550561797753e-05, "loss": 0.8882, "step": 620 }, { "epoch": 0.011060587440088485, "grad_norm": 0.14846248260345, "learning_rate": 2.208567415730337e-05, "loss": 0.8895, "step": 630 }, { "epoch": 0.01123615232008989, "grad_norm": 0.3889962019808758, "learning_rate": 2.2436797752808992e-05, "loss": 0.8867, "step": 640 }, { "epoch": 0.011411717200091294, "grad_norm": 0.13123791801487897, "learning_rate": 2.2787921348314608e-05, "loss": 0.8865, "step": 650 }, { "epoch": 0.011587282080092699, "grad_norm": 0.11073845463814745, "learning_rate": 2.3139044943820228e-05, "loss": 0.8806, "step": 660 }, { "epoch": 0.011762846960094104, "grad_norm": 0.1578732363955825, "learning_rate": 2.3490168539325844e-05, "loss": 0.8887, "step": 670 }, { "epoch": 0.011938411840095507, "grad_norm": 0.10143291619102777, "learning_rate": 2.384129213483146e-05, "loss": 0.8805, "step": 680 }, { "epoch": 0.012113976720096911, "grad_norm": 0.08763109379315187, "learning_rate": 2.419241573033708e-05, "loss": 0.8851, "step": 690 }, { "epoch": 0.012289541600098316, "grad_norm": 0.08784034286918553, "learning_rate": 2.45435393258427e-05, "loss": 0.8779, "step": 700 }, { "epoch": 0.01246510648009972, "grad_norm": 0.09181187838551577, "learning_rate": 2.4894662921348315e-05, "loss": 0.887, "step": 710 }, { "epoch": 0.012640671360101125, "grad_norm": 0.19649377203734933, "learning_rate": 2.5245786516853938e-05, "loss": 0.8834, "step": 720 }, { "epoch": 0.01281623624010253, "grad_norm": 0.12061678019396534, "learning_rate": 2.559691011235955e-05, "loss": 0.8929, "step": 730 }, { "epoch": 0.012991801120103934, "grad_norm": 0.10118288204090287, "learning_rate": 2.5948033707865173e-05, "loss": 0.8786, "step": 740 }, { "epoch": 0.01316736600010534, "grad_norm": 0.0951775531904648, "learning_rate": 2.6299157303370786e-05, "loss": 0.8823, "step": 750 }, { "epoch": 0.013342930880106744, "grad_norm": 0.14484406391415428, "learning_rate": 2.6650280898876405e-05, "loss": 0.8932, "step": 760 }, { "epoch": 0.013518495760108148, "grad_norm": 0.09667707906081065, "learning_rate": 2.700140449438202e-05, "loss": 0.8853, "step": 770 }, { "epoch": 0.013694060640109553, "grad_norm": 0.09430730124868125, "learning_rate": 2.735252808988764e-05, "loss": 0.8846, "step": 780 }, { "epoch": 0.013869625520110958, "grad_norm": 0.19445492363204217, "learning_rate": 2.770365168539326e-05, "loss": 0.8856, "step": 790 }, { "epoch": 0.01404519040011236, "grad_norm": 0.14603069834354043, "learning_rate": 2.8054775280898876e-05, "loss": 0.8811, "step": 800 }, { "epoch": 0.014220755280113765, "grad_norm": 0.16915073790782253, "learning_rate": 2.8405898876404495e-05, "loss": 0.8896, "step": 810 }, { "epoch": 0.01439632016011517, "grad_norm": 0.19749962008297817, "learning_rate": 2.875702247191011e-05, "loss": 0.8755, "step": 820 }, { "epoch": 0.014571885040116575, "grad_norm": 0.20199712665010328, "learning_rate": 2.910814606741573e-05, "loss": 0.8858, "step": 830 }, { "epoch": 0.01474744992011798, "grad_norm": 0.19151917433720766, "learning_rate": 2.945926966292135e-05, "loss": 0.8867, "step": 840 }, { "epoch": 0.014923014800119384, "grad_norm": 0.15104805977276609, "learning_rate": 2.9810393258426966e-05, "loss": 0.889, "step": 850 }, { "epoch": 0.015098579680120789, "grad_norm": 0.08698921718514607, "learning_rate": 3.0161516853932586e-05, "loss": 0.8833, "step": 860 }, { "epoch": 0.015274144560122193, "grad_norm": 0.21719231893707253, "learning_rate": 3.0512640449438202e-05, "loss": 0.8772, "step": 870 }, { "epoch": 0.015449709440123598, "grad_norm": 0.15292399935888182, "learning_rate": 3.086376404494382e-05, "loss": 0.8864, "step": 880 }, { "epoch": 0.015625274320125, "grad_norm": 0.12868821622479323, "learning_rate": 3.121488764044944e-05, "loss": 0.8808, "step": 890 }, { "epoch": 0.015800839200126406, "grad_norm": 0.1114463273839004, "learning_rate": 3.1566011235955053e-05, "loss": 0.8834, "step": 900 }, { "epoch": 0.01597640408012781, "grad_norm": 0.14646938220861777, "learning_rate": 3.191713483146068e-05, "loss": 0.876, "step": 910 }, { "epoch": 0.016151968960129215, "grad_norm": 0.08950874145362948, "learning_rate": 3.226825842696629e-05, "loss": 0.8834, "step": 920 }, { "epoch": 0.01632753384013062, "grad_norm": 0.09747202190192505, "learning_rate": 3.261938202247191e-05, "loss": 0.8788, "step": 930 }, { "epoch": 0.016503098720132024, "grad_norm": 0.08582650256353923, "learning_rate": 3.297050561797753e-05, "loss": 0.881, "step": 940 }, { "epoch": 0.01667866360013343, "grad_norm": 0.13023059248395036, "learning_rate": 3.3321629213483144e-05, "loss": 0.8792, "step": 950 }, { "epoch": 0.016854228480134834, "grad_norm": 0.10247776876484127, "learning_rate": 3.367275280898877e-05, "loss": 0.8817, "step": 960 }, { "epoch": 0.017029793360136238, "grad_norm": 0.08775525370839382, "learning_rate": 3.402387640449438e-05, "loss": 0.8846, "step": 970 }, { "epoch": 0.017205358240137643, "grad_norm": 0.11476314763996193, "learning_rate": 3.4375e-05, "loss": 0.8725, "step": 980 }, { "epoch": 0.017380923120139048, "grad_norm": 0.09282239496851163, "learning_rate": 3.472612359550562e-05, "loss": 0.8805, "step": 990 }, { "epoch": 0.017556488000140452, "grad_norm": 0.10890505592962989, "learning_rate": 3.5077247191011234e-05, "loss": 0.8842, "step": 1000 }, { "epoch": 0.017732052880141857, "grad_norm": 0.09240520159701895, "learning_rate": 3.5428370786516854e-05, "loss": 0.8832, "step": 1010 }, { "epoch": 0.01790761776014326, "grad_norm": 0.10706185216074135, "learning_rate": 3.577949438202247e-05, "loss": 0.8821, "step": 1020 }, { "epoch": 0.018083182640144666, "grad_norm": 0.15557394691058934, "learning_rate": 3.613061797752809e-05, "loss": 0.8815, "step": 1030 }, { "epoch": 0.01825874752014607, "grad_norm": 0.3040345859600493, "learning_rate": 3.648174157303371e-05, "loss": 0.8858, "step": 1040 }, { "epoch": 0.018434312400147475, "grad_norm": 0.12407916724674464, "learning_rate": 3.6832865168539325e-05, "loss": 0.8771, "step": 1050 }, { "epoch": 0.01860987728014888, "grad_norm": 0.11502050992909092, "learning_rate": 3.7183988764044944e-05, "loss": 0.8803, "step": 1060 }, { "epoch": 0.018785442160150285, "grad_norm": 0.14017187555612576, "learning_rate": 3.7535112359550563e-05, "loss": 0.8769, "step": 1070 }, { "epoch": 0.01896100704015169, "grad_norm": 0.09143403988859881, "learning_rate": 3.788623595505618e-05, "loss": 0.8871, "step": 1080 }, { "epoch": 0.019136571920153094, "grad_norm": 0.11545895191184127, "learning_rate": 3.82373595505618e-05, "loss": 0.8815, "step": 1090 }, { "epoch": 0.0193121368001545, "grad_norm": 0.13316062255351235, "learning_rate": 3.8588483146067415e-05, "loss": 0.8738, "step": 1100 }, { "epoch": 0.0194877016801559, "grad_norm": 0.08756710132417034, "learning_rate": 3.8939606741573034e-05, "loss": 0.8799, "step": 1110 }, { "epoch": 0.019663266560157305, "grad_norm": 0.10678008159217663, "learning_rate": 3.9290730337078654e-05, "loss": 0.8767, "step": 1120 }, { "epoch": 0.01983883144015871, "grad_norm": 0.11334060600352037, "learning_rate": 3.964185393258427e-05, "loss": 0.882, "step": 1130 }, { "epoch": 0.020014396320160114, "grad_norm": 0.1160410339823135, "learning_rate": 3.999297752808989e-05, "loss": 0.8816, "step": 1140 }, { "epoch": 0.02018996120016152, "grad_norm": 0.12459030880076682, "learning_rate": 4.0344101123595505e-05, "loss": 0.8795, "step": 1150 }, { "epoch": 0.020365526080162923, "grad_norm": 0.12559299403358756, "learning_rate": 4.0695224719101125e-05, "loss": 0.8782, "step": 1160 }, { "epoch": 0.020541090960164328, "grad_norm": 0.10023943972764061, "learning_rate": 4.1046348314606744e-05, "loss": 0.8791, "step": 1170 }, { "epoch": 0.020716655840165733, "grad_norm": 0.10316844757941478, "learning_rate": 4.1397471910112364e-05, "loss": 0.8823, "step": 1180 }, { "epoch": 0.020892220720167137, "grad_norm": 0.14926091459568772, "learning_rate": 4.174859550561798e-05, "loss": 0.8907, "step": 1190 }, { "epoch": 0.021067785600168542, "grad_norm": 0.08984367645265927, "learning_rate": 4.2099719101123596e-05, "loss": 0.881, "step": 1200 }, { "epoch": 0.021243350480169947, "grad_norm": 0.17623413875591554, "learning_rate": 4.2450842696629215e-05, "loss": 0.8739, "step": 1210 }, { "epoch": 0.02141891536017135, "grad_norm": 0.16791972225000076, "learning_rate": 4.2801966292134835e-05, "loss": 0.8794, "step": 1220 }, { "epoch": 0.021594480240172756, "grad_norm": 0.10879541248589036, "learning_rate": 4.3153089887640454e-05, "loss": 0.8802, "step": 1230 }, { "epoch": 0.02177004512017416, "grad_norm": 0.11338165637195254, "learning_rate": 4.350421348314607e-05, "loss": 0.8805, "step": 1240 }, { "epoch": 0.021945610000175565, "grad_norm": 0.12179767113807875, "learning_rate": 4.3855337078651686e-05, "loss": 0.8762, "step": 1250 }, { "epoch": 0.02212117488017697, "grad_norm": 0.09004762223463969, "learning_rate": 4.4206460674157305e-05, "loss": 0.8807, "step": 1260 }, { "epoch": 0.022296739760178375, "grad_norm": 0.11161869071576214, "learning_rate": 4.4557584269662925e-05, "loss": 0.8747, "step": 1270 }, { "epoch": 0.02247230464017978, "grad_norm": 0.1320558927752511, "learning_rate": 4.4908707865168544e-05, "loss": 0.8751, "step": 1280 }, { "epoch": 0.022647869520181184, "grad_norm": 0.10185680177260303, "learning_rate": 4.525983146067416e-05, "loss": 0.8717, "step": 1290 }, { "epoch": 0.02282343440018259, "grad_norm": 0.14466921189363552, "learning_rate": 4.5610955056179776e-05, "loss": 0.8801, "step": 1300 }, { "epoch": 0.022998999280183993, "grad_norm": 0.1622565537783581, "learning_rate": 4.5962078651685396e-05, "loss": 0.883, "step": 1310 }, { "epoch": 0.023174564160185398, "grad_norm": 0.12906234720991022, "learning_rate": 4.6313202247191015e-05, "loss": 0.884, "step": 1320 }, { "epoch": 0.023350129040186803, "grad_norm": 0.0823508745676725, "learning_rate": 4.6664325842696635e-05, "loss": 0.8718, "step": 1330 }, { "epoch": 0.023525693920188207, "grad_norm": 0.06900689900788833, "learning_rate": 4.701544943820225e-05, "loss": 0.8714, "step": 1340 }, { "epoch": 0.02370125880018961, "grad_norm": 0.10270879313362849, "learning_rate": 4.736657303370787e-05, "loss": 0.8774, "step": 1350 }, { "epoch": 0.023876823680191013, "grad_norm": 0.10273776368395167, "learning_rate": 4.7717696629213486e-05, "loss": 0.8826, "step": 1360 }, { "epoch": 0.024052388560192418, "grad_norm": 0.09596988299799603, "learning_rate": 4.80688202247191e-05, "loss": 0.8798, "step": 1370 }, { "epoch": 0.024227953440193822, "grad_norm": 0.11847816310379901, "learning_rate": 4.8419943820224725e-05, "loss": 0.8729, "step": 1380 }, { "epoch": 0.024403518320195227, "grad_norm": 0.10770913945601095, "learning_rate": 4.877106741573034e-05, "loss": 0.8725, "step": 1390 }, { "epoch": 0.02457908320019663, "grad_norm": 0.09760188151512993, "learning_rate": 4.912219101123596e-05, "loss": 0.8782, "step": 1400 }, { "epoch": 0.024754648080198036, "grad_norm": 0.12070470238690403, "learning_rate": 4.947331460674158e-05, "loss": 0.8765, "step": 1410 }, { "epoch": 0.02493021296019944, "grad_norm": 0.08369159988123433, "learning_rate": 4.982443820224719e-05, "loss": 0.873, "step": 1420 }, { "epoch": 0.025105777840200846, "grad_norm": 0.11018167891867899, "learning_rate": 5.0175561797752815e-05, "loss": 0.8772, "step": 1430 }, { "epoch": 0.02528134272020225, "grad_norm": 0.11718188814986187, "learning_rate": 5.0526685393258435e-05, "loss": 0.8794, "step": 1440 }, { "epoch": 0.025456907600203655, "grad_norm": 0.12314955573510664, "learning_rate": 5.087780898876404e-05, "loss": 0.8754, "step": 1450 }, { "epoch": 0.02563247248020506, "grad_norm": 0.12931857851416387, "learning_rate": 5.122893258426966e-05, "loss": 0.8775, "step": 1460 }, { "epoch": 0.025808037360206464, "grad_norm": 0.07576528257708266, "learning_rate": 5.158005617977528e-05, "loss": 0.8762, "step": 1470 }, { "epoch": 0.02598360224020787, "grad_norm": 0.10621841321010858, "learning_rate": 5.1931179775280906e-05, "loss": 0.8832, "step": 1480 }, { "epoch": 0.026159167120209274, "grad_norm": 0.11818241117123471, "learning_rate": 5.2282303370786525e-05, "loss": 0.8801, "step": 1490 }, { "epoch": 0.02633473200021068, "grad_norm": 0.09873292829166555, "learning_rate": 5.263342696629213e-05, "loss": 0.8743, "step": 1500 }, { "epoch": 0.026510296880212083, "grad_norm": 0.1140487973108858, "learning_rate": 5.298455056179775e-05, "loss": 0.878, "step": 1510 }, { "epoch": 0.026685861760213488, "grad_norm": 0.08472200669063173, "learning_rate": 5.333567415730337e-05, "loss": 0.8773, "step": 1520 }, { "epoch": 0.026861426640214892, "grad_norm": 0.108810534675764, "learning_rate": 5.368679775280899e-05, "loss": 0.8721, "step": 1530 }, { "epoch": 0.027036991520216297, "grad_norm": 0.09625556890133662, "learning_rate": 5.4037921348314616e-05, "loss": 0.8738, "step": 1540 }, { "epoch": 0.0272125564002177, "grad_norm": 0.07747487756307622, "learning_rate": 5.438904494382022e-05, "loss": 0.8717, "step": 1550 }, { "epoch": 0.027388121280219106, "grad_norm": 0.10360135602517416, "learning_rate": 5.474016853932584e-05, "loss": 0.8707, "step": 1560 }, { "epoch": 0.02756368616022051, "grad_norm": 0.09995946461117297, "learning_rate": 5.509129213483146e-05, "loss": 0.8715, "step": 1570 }, { "epoch": 0.027739251040221916, "grad_norm": 0.1213715210921303, "learning_rate": 5.544241573033708e-05, "loss": 0.874, "step": 1580 }, { "epoch": 0.02791481592022332, "grad_norm": 0.13437138004106108, "learning_rate": 5.5793539325842706e-05, "loss": 0.8792, "step": 1590 }, { "epoch": 0.02809038080022472, "grad_norm": 0.11119987780625132, "learning_rate": 5.614466292134831e-05, "loss": 0.8759, "step": 1600 }, { "epoch": 0.028265945680226126, "grad_norm": 0.1465789182089292, "learning_rate": 5.649578651685393e-05, "loss": 0.8739, "step": 1610 }, { "epoch": 0.02844151056022753, "grad_norm": 0.12862822684017766, "learning_rate": 5.684691011235955e-05, "loss": 0.8765, "step": 1620 }, { "epoch": 0.028617075440228935, "grad_norm": 0.13542549581629815, "learning_rate": 5.719803370786517e-05, "loss": 0.8761, "step": 1630 }, { "epoch": 0.02879264032023034, "grad_norm": 0.11928026764126487, "learning_rate": 5.7549157303370796e-05, "loss": 0.8784, "step": 1640 }, { "epoch": 0.028968205200231745, "grad_norm": 0.0763259314761483, "learning_rate": 5.79002808988764e-05, "loss": 0.881, "step": 1650 }, { "epoch": 0.02914377008023315, "grad_norm": 0.07782812695027325, "learning_rate": 5.825140449438202e-05, "loss": 0.8739, "step": 1660 }, { "epoch": 0.029319334960234554, "grad_norm": 0.13227242239160308, "learning_rate": 5.860252808988764e-05, "loss": 0.8755, "step": 1670 }, { "epoch": 0.02949489984023596, "grad_norm": 0.09658700802290138, "learning_rate": 5.895365168539326e-05, "loss": 0.8747, "step": 1680 }, { "epoch": 0.029670464720237363, "grad_norm": 0.12127904263227096, "learning_rate": 5.930477528089888e-05, "loss": 0.8669, "step": 1690 }, { "epoch": 0.029846029600238768, "grad_norm": 0.15261930954246689, "learning_rate": 5.965589887640449e-05, "loss": 0.8819, "step": 1700 }, { "epoch": 0.030021594480240173, "grad_norm": 0.09311605073275837, "learning_rate": 6.000702247191011e-05, "loss": 0.8753, "step": 1710 }, { "epoch": 0.030197159360241577, "grad_norm": 0.1603394526918929, "learning_rate": 6.035814606741573e-05, "loss": 0.8677, "step": 1720 }, { "epoch": 0.030372724240242982, "grad_norm": 0.10052514144643267, "learning_rate": 6.070926966292135e-05, "loss": 0.8769, "step": 1730 }, { "epoch": 0.030548289120244387, "grad_norm": 0.1114893564591888, "learning_rate": 6.106039325842698e-05, "loss": 0.8754, "step": 1740 }, { "epoch": 0.03072385400024579, "grad_norm": 0.07851976586212815, "learning_rate": 6.141151685393258e-05, "loss": 0.8779, "step": 1750 }, { "epoch": 0.030899418880247196, "grad_norm": 0.09385890866527777, "learning_rate": 6.17626404494382e-05, "loss": 0.8724, "step": 1760 }, { "epoch": 0.0310749837602486, "grad_norm": 0.07885972168006505, "learning_rate": 6.211376404494383e-05, "loss": 0.8804, "step": 1770 }, { "epoch": 0.03125054864025, "grad_norm": 0.07208905134588864, "learning_rate": 6.246488764044944e-05, "loss": 0.8699, "step": 1780 }, { "epoch": 0.03142611352025141, "grad_norm": 0.08292397779923719, "learning_rate": 6.281601123595507e-05, "loss": 0.8778, "step": 1790 }, { "epoch": 0.03160167840025281, "grad_norm": 0.08427477325386275, "learning_rate": 6.316713483146067e-05, "loss": 0.8805, "step": 1800 }, { "epoch": 0.03177724328025422, "grad_norm": 0.12272744268665463, "learning_rate": 6.351825842696629e-05, "loss": 0.8688, "step": 1810 }, { "epoch": 0.03195280816025562, "grad_norm": 0.07212266832330874, "learning_rate": 6.386938202247192e-05, "loss": 0.8792, "step": 1820 }, { "epoch": 0.03212837304025703, "grad_norm": 0.09181690769470355, "learning_rate": 6.422050561797753e-05, "loss": 0.8725, "step": 1830 }, { "epoch": 0.03230393792025843, "grad_norm": 0.08977185572727775, "learning_rate": 6.457162921348316e-05, "loss": 0.8692, "step": 1840 }, { "epoch": 0.03247950280025984, "grad_norm": 0.0930865006899593, "learning_rate": 6.492275280898876e-05, "loss": 0.8722, "step": 1850 }, { "epoch": 0.03265506768026124, "grad_norm": 0.0960602121051624, "learning_rate": 6.527387640449438e-05, "loss": 0.8731, "step": 1860 }, { "epoch": 0.03283063256026265, "grad_norm": 0.08424816990217762, "learning_rate": 6.562500000000001e-05, "loss": 0.8775, "step": 1870 }, { "epoch": 0.03300619744026405, "grad_norm": 0.1428367714092643, "learning_rate": 6.597612359550562e-05, "loss": 0.8723, "step": 1880 }, { "epoch": 0.03318176232026546, "grad_norm": 0.09339724033251974, "learning_rate": 6.632724719101125e-05, "loss": 0.8816, "step": 1890 }, { "epoch": 0.03335732720026686, "grad_norm": 0.1149360897725946, "learning_rate": 6.667837078651685e-05, "loss": 0.8671, "step": 1900 }, { "epoch": 0.033532892080268266, "grad_norm": 0.07463221208225414, "learning_rate": 6.702949438202247e-05, "loss": 0.8744, "step": 1910 }, { "epoch": 0.03370845696026967, "grad_norm": 0.09317303782517729, "learning_rate": 6.73806179775281e-05, "loss": 0.8688, "step": 1920 }, { "epoch": 0.033884021840271075, "grad_norm": 0.0827646211262487, "learning_rate": 6.773174157303371e-05, "loss": 0.867, "step": 1930 }, { "epoch": 0.034059586720272476, "grad_norm": 0.11607234004724722, "learning_rate": 6.808286516853934e-05, "loss": 0.8789, "step": 1940 }, { "epoch": 0.034235151600273885, "grad_norm": 0.08829585851860171, "learning_rate": 6.843398876404494e-05, "loss": 0.8668, "step": 1950 }, { "epoch": 0.034410716480275286, "grad_norm": 0.08630338550914773, "learning_rate": 6.878511235955056e-05, "loss": 0.877, "step": 1960 }, { "epoch": 0.03458628136027669, "grad_norm": 0.1163233532454095, "learning_rate": 6.913623595505618e-05, "loss": 0.8714, "step": 1970 }, { "epoch": 0.034761846240278095, "grad_norm": 0.12675596910350276, "learning_rate": 6.94873595505618e-05, "loss": 0.8774, "step": 1980 }, { "epoch": 0.034937411120279496, "grad_norm": 0.09444714022240416, "learning_rate": 6.983848314606743e-05, "loss": 0.8744, "step": 1990 }, { "epoch": 0.035112976000280904, "grad_norm": 0.08960441180721453, "learning_rate": 7.018960674157303e-05, "loss": 0.8808, "step": 2000 }, { "epoch": 0.035288540880282306, "grad_norm": 0.0998993091001679, "learning_rate": 7.054073033707865e-05, "loss": 0.8781, "step": 2010 }, { "epoch": 0.035464105760283714, "grad_norm": 0.11235014703105918, "learning_rate": 7.089185393258427e-05, "loss": 0.8783, "step": 2020 }, { "epoch": 0.035639670640285115, "grad_norm": 0.08286826522891387, "learning_rate": 7.12429775280899e-05, "loss": 0.8769, "step": 2030 }, { "epoch": 0.03581523552028652, "grad_norm": 0.10936917717514412, "learning_rate": 7.159410112359552e-05, "loss": 0.8748, "step": 2040 }, { "epoch": 0.035990800400287924, "grad_norm": 0.12021495932110862, "learning_rate": 7.194522471910112e-05, "loss": 0.8688, "step": 2050 }, { "epoch": 0.03616636528028933, "grad_norm": 0.0866737134060902, "learning_rate": 7.229634831460674e-05, "loss": 0.8705, "step": 2060 }, { "epoch": 0.036341930160290734, "grad_norm": 0.09024807003894653, "learning_rate": 7.264747191011236e-05, "loss": 0.8786, "step": 2070 }, { "epoch": 0.03651749504029214, "grad_norm": 0.11950063547577987, "learning_rate": 7.299859550561798e-05, "loss": 0.8701, "step": 2080 }, { "epoch": 0.03669305992029354, "grad_norm": 0.1302159495426378, "learning_rate": 7.334971910112361e-05, "loss": 0.8702, "step": 2090 }, { "epoch": 0.03686862480029495, "grad_norm": 0.1106051898767845, "learning_rate": 7.370084269662921e-05, "loss": 0.8731, "step": 2100 }, { "epoch": 0.03704418968029635, "grad_norm": 0.12559592446553916, "learning_rate": 7.405196629213484e-05, "loss": 0.875, "step": 2110 }, { "epoch": 0.03721975456029776, "grad_norm": 0.0744267921977051, "learning_rate": 7.440308988764045e-05, "loss": 0.8666, "step": 2120 }, { "epoch": 0.03739531944029916, "grad_norm": 0.08962407126469685, "learning_rate": 7.475421348314607e-05, "loss": 0.8716, "step": 2130 }, { "epoch": 0.03757088432030057, "grad_norm": 0.09835657623995289, "learning_rate": 7.51053370786517e-05, "loss": 0.8701, "step": 2140 }, { "epoch": 0.03774644920030197, "grad_norm": 0.07510032325589777, "learning_rate": 7.54564606741573e-05, "loss": 0.8651, "step": 2150 }, { "epoch": 0.03792201408030338, "grad_norm": 0.1279588603520965, "learning_rate": 7.580758426966293e-05, "loss": 0.8747, "step": 2160 }, { "epoch": 0.03809757896030478, "grad_norm": 0.0836110324331831, "learning_rate": 7.615870786516854e-05, "loss": 0.8704, "step": 2170 }, { "epoch": 0.03827314384030619, "grad_norm": 0.07018143541902473, "learning_rate": 7.650983146067416e-05, "loss": 0.8827, "step": 2180 }, { "epoch": 0.03844870872030759, "grad_norm": 0.08837340260940743, "learning_rate": 7.686095505617979e-05, "loss": 0.8718, "step": 2190 }, { "epoch": 0.038624273600309, "grad_norm": 0.08585956793197065, "learning_rate": 7.721207865168539e-05, "loss": 0.875, "step": 2200 }, { "epoch": 0.0387998384803104, "grad_norm": 0.09076681816308635, "learning_rate": 7.756320224719102e-05, "loss": 0.8745, "step": 2210 }, { "epoch": 0.0389754033603118, "grad_norm": 0.0900400659383941, "learning_rate": 7.791432584269663e-05, "loss": 0.8734, "step": 2220 }, { "epoch": 0.03915096824031321, "grad_norm": 0.11490939261904487, "learning_rate": 7.826544943820225e-05, "loss": 0.8773, "step": 2230 }, { "epoch": 0.03932653312031461, "grad_norm": 0.09547733989058134, "learning_rate": 7.861657303370788e-05, "loss": 0.871, "step": 2240 }, { "epoch": 0.03950209800031602, "grad_norm": 0.09366012973645699, "learning_rate": 7.896769662921348e-05, "loss": 0.8789, "step": 2250 }, { "epoch": 0.03967766288031742, "grad_norm": 0.13061623207691203, "learning_rate": 7.93188202247191e-05, "loss": 0.8701, "step": 2260 }, { "epoch": 0.03985322776031883, "grad_norm": 0.09396449884425516, "learning_rate": 7.966994382022472e-05, "loss": 0.8674, "step": 2270 }, { "epoch": 0.04002879264032023, "grad_norm": 0.081202074367791, "learning_rate": 8.002106741573035e-05, "loss": 0.8708, "step": 2280 }, { "epoch": 0.040204357520321636, "grad_norm": 0.08152083969532947, "learning_rate": 8.037219101123596e-05, "loss": 0.8751, "step": 2290 }, { "epoch": 0.04037992240032304, "grad_norm": 0.10445825729166426, "learning_rate": 8.072331460674157e-05, "loss": 0.8746, "step": 2300 }, { "epoch": 0.040555487280324445, "grad_norm": 0.13780276532848207, "learning_rate": 8.10744382022472e-05, "loss": 0.8714, "step": 2310 }, { "epoch": 0.04073105216032585, "grad_norm": 0.07835817898931924, "learning_rate": 8.142556179775281e-05, "loss": 0.8736, "step": 2320 }, { "epoch": 0.040906617040327255, "grad_norm": 0.07331873240648742, "learning_rate": 8.177668539325844e-05, "loss": 0.8731, "step": 2330 }, { "epoch": 0.041082181920328656, "grad_norm": 0.11721648321005346, "learning_rate": 8.212780898876405e-05, "loss": 0.8733, "step": 2340 }, { "epoch": 0.041257746800330064, "grad_norm": 0.1719497813435053, "learning_rate": 8.247893258426966e-05, "loss": 0.8791, "step": 2350 }, { "epoch": 0.041433311680331465, "grad_norm": 0.08148721441455263, "learning_rate": 8.283005617977529e-05, "loss": 0.8723, "step": 2360 }, { "epoch": 0.04160887656033287, "grad_norm": 0.10026605180058305, "learning_rate": 8.31811797752809e-05, "loss": 0.8748, "step": 2370 }, { "epoch": 0.041784441440334275, "grad_norm": 0.11754014286672843, "learning_rate": 8.353230337078653e-05, "loss": 0.8782, "step": 2380 }, { "epoch": 0.04196000632033568, "grad_norm": 0.10731994918529643, "learning_rate": 8.388342696629214e-05, "loss": 0.8802, "step": 2390 }, { "epoch": 0.042135571200337084, "grad_norm": 0.10859563034401212, "learning_rate": 8.423455056179775e-05, "loss": 0.8689, "step": 2400 }, { "epoch": 0.04231113608033849, "grad_norm": 0.09597930546324353, "learning_rate": 8.458567415730338e-05, "loss": 0.8696, "step": 2410 }, { "epoch": 0.04248670096033989, "grad_norm": 0.10547186388674545, "learning_rate": 8.493679775280899e-05, "loss": 0.8787, "step": 2420 }, { "epoch": 0.0426622658403413, "grad_norm": 0.09348780419089932, "learning_rate": 8.528792134831462e-05, "loss": 0.8692, "step": 2430 }, { "epoch": 0.0428378307203427, "grad_norm": 0.08611926721115874, "learning_rate": 8.563904494382023e-05, "loss": 0.8722, "step": 2440 }, { "epoch": 0.043013395600344104, "grad_norm": 0.11653422929769607, "learning_rate": 8.599016853932584e-05, "loss": 0.8776, "step": 2450 }, { "epoch": 0.04318896048034551, "grad_norm": 0.1111063126732818, "learning_rate": 8.634129213483147e-05, "loss": 0.8705, "step": 2460 }, { "epoch": 0.04336452536034691, "grad_norm": 0.14743627127639225, "learning_rate": 8.669241573033708e-05, "loss": 0.8715, "step": 2470 }, { "epoch": 0.04354009024034832, "grad_norm": 0.1007333490386571, "learning_rate": 8.70435393258427e-05, "loss": 0.8733, "step": 2480 }, { "epoch": 0.04371565512034972, "grad_norm": 0.07123765213968385, "learning_rate": 8.739466292134832e-05, "loss": 0.8701, "step": 2490 }, { "epoch": 0.04389122000035113, "grad_norm": 0.09039947304214216, "learning_rate": 8.774578651685393e-05, "loss": 0.8852, "step": 2500 }, { "epoch": 0.04406678488035253, "grad_norm": 0.0877478104769651, "learning_rate": 8.809691011235956e-05, "loss": 0.8696, "step": 2510 }, { "epoch": 0.04424234976035394, "grad_norm": 0.07930286645977104, "learning_rate": 8.844803370786517e-05, "loss": 0.8661, "step": 2520 }, { "epoch": 0.04441791464035534, "grad_norm": 0.15239307871870406, "learning_rate": 8.87991573033708e-05, "loss": 0.8696, "step": 2530 }, { "epoch": 0.04459347952035675, "grad_norm": 0.0661294853551783, "learning_rate": 8.915028089887641e-05, "loss": 0.8713, "step": 2540 }, { "epoch": 0.04476904440035815, "grad_norm": 0.07691426284051446, "learning_rate": 8.950140449438202e-05, "loss": 0.871, "step": 2550 }, { "epoch": 0.04494460928035956, "grad_norm": 0.09285817049029198, "learning_rate": 8.985252808988765e-05, "loss": 0.8618, "step": 2560 }, { "epoch": 0.04512017416036096, "grad_norm": 0.1445573685383954, "learning_rate": 9.020365168539326e-05, "loss": 0.8722, "step": 2570 }, { "epoch": 0.04529573904036237, "grad_norm": 0.09214466523692007, "learning_rate": 9.055477528089889e-05, "loss": 0.8641, "step": 2580 }, { "epoch": 0.04547130392036377, "grad_norm": 0.08881843255206193, "learning_rate": 9.09058988764045e-05, "loss": 0.8712, "step": 2590 }, { "epoch": 0.04564686880036518, "grad_norm": 0.09426639476722427, "learning_rate": 9.125702247191011e-05, "loss": 0.8687, "step": 2600 }, { "epoch": 0.04582243368036658, "grad_norm": 0.08790925243557784, "learning_rate": 9.160814606741574e-05, "loss": 0.8703, "step": 2610 }, { "epoch": 0.045997998560367986, "grad_norm": 0.1344042728083833, "learning_rate": 9.195926966292135e-05, "loss": 0.8747, "step": 2620 }, { "epoch": 0.04617356344036939, "grad_norm": 0.0819882931403838, "learning_rate": 9.231039325842698e-05, "loss": 0.869, "step": 2630 }, { "epoch": 0.046349128320370796, "grad_norm": 0.07695996431276211, "learning_rate": 9.266151685393259e-05, "loss": 0.8763, "step": 2640 }, { "epoch": 0.0465246932003722, "grad_norm": 0.12103296580832704, "learning_rate": 9.30126404494382e-05, "loss": 0.8759, "step": 2650 }, { "epoch": 0.046700258080373605, "grad_norm": 0.10053373488238627, "learning_rate": 9.336376404494383e-05, "loss": 0.8628, "step": 2660 }, { "epoch": 0.046875822960375006, "grad_norm": 0.09363301253517695, "learning_rate": 9.371488764044944e-05, "loss": 0.8813, "step": 2670 }, { "epoch": 0.047051387840376414, "grad_norm": 0.10884568097127366, "learning_rate": 9.406601123595507e-05, "loss": 0.8715, "step": 2680 }, { "epoch": 0.047226952720377816, "grad_norm": 0.09105824767723958, "learning_rate": 9.441713483146067e-05, "loss": 0.868, "step": 2690 }, { "epoch": 0.04740251760037922, "grad_norm": 0.07610597950464966, "learning_rate": 9.47682584269663e-05, "loss": 0.873, "step": 2700 }, { "epoch": 0.047578082480380625, "grad_norm": 0.10464935004067749, "learning_rate": 9.511938202247192e-05, "loss": 0.8716, "step": 2710 }, { "epoch": 0.047753647360382026, "grad_norm": 0.06946899076922435, "learning_rate": 9.547050561797753e-05, "loss": 0.8684, "step": 2720 }, { "epoch": 0.047929212240383434, "grad_norm": 0.08061928733635612, "learning_rate": 9.582162921348316e-05, "loss": 0.8782, "step": 2730 }, { "epoch": 0.048104777120384835, "grad_norm": 0.11139900903899783, "learning_rate": 9.617275280898876e-05, "loss": 0.8867, "step": 2740 }, { "epoch": 0.048280342000386244, "grad_norm": 0.10383002385475375, "learning_rate": 9.652387640449438e-05, "loss": 0.8675, "step": 2750 }, { "epoch": 0.048455906880387645, "grad_norm": 0.0742451160080383, "learning_rate": 9.687500000000001e-05, "loss": 0.8719, "step": 2760 }, { "epoch": 0.04863147176038905, "grad_norm": 0.08339560204858726, "learning_rate": 9.722612359550562e-05, "loss": 0.874, "step": 2770 }, { "epoch": 0.048807036640390454, "grad_norm": 0.0886617007520003, "learning_rate": 9.757724719101125e-05, "loss": 0.8758, "step": 2780 }, { "epoch": 0.04898260152039186, "grad_norm": 0.07014192220796461, "learning_rate": 9.792837078651685e-05, "loss": 0.8703, "step": 2790 }, { "epoch": 0.04915816640039326, "grad_norm": 0.10642527077501172, "learning_rate": 9.827949438202247e-05, "loss": 0.8716, "step": 2800 }, { "epoch": 0.04933373128039467, "grad_norm": 0.12543116445892447, "learning_rate": 9.86306179775281e-05, "loss": 0.8704, "step": 2810 }, { "epoch": 0.04950929616039607, "grad_norm": 0.09696623122489874, "learning_rate": 9.898174157303371e-05, "loss": 0.8686, "step": 2820 }, { "epoch": 0.04968486104039748, "grad_norm": 0.08364880955280966, "learning_rate": 9.933286516853934e-05, "loss": 0.8677, "step": 2830 }, { "epoch": 0.04986042592039888, "grad_norm": 0.07627651929749238, "learning_rate": 9.968398876404494e-05, "loss": 0.8654, "step": 2840 }, { "epoch": 0.05003599080040029, "grad_norm": 0.07662343368914502, "learning_rate": 0.00010003511235955056, "loss": 0.8706, "step": 2850 }, { "epoch": 0.05021155568040169, "grad_norm": 0.09355597457773152, "learning_rate": 0.00010038623595505619, "loss": 0.8746, "step": 2860 }, { "epoch": 0.0503871205604031, "grad_norm": 0.14354171802669177, "learning_rate": 0.0001007373595505618, "loss": 0.8686, "step": 2870 }, { "epoch": 0.0505626854404045, "grad_norm": 0.08260488112384638, "learning_rate": 0.00010108848314606743, "loss": 0.865, "step": 2880 }, { "epoch": 0.05073825032040591, "grad_norm": 0.12336890236691185, "learning_rate": 0.00010143960674157304, "loss": 0.8682, "step": 2890 }, { "epoch": 0.05091381520040731, "grad_norm": 0.09619293620987679, "learning_rate": 0.00010179073033707867, "loss": 0.8657, "step": 2900 }, { "epoch": 0.05108938008040872, "grad_norm": 0.09299525913420573, "learning_rate": 0.00010214185393258427, "loss": 0.8702, "step": 2910 }, { "epoch": 0.05126494496041012, "grad_norm": 0.10287830534293611, "learning_rate": 0.00010249297752808988, "loss": 0.868, "step": 2920 }, { "epoch": 0.05144050984041153, "grad_norm": 0.06290306939053397, "learning_rate": 0.0001028441011235955, "loss": 0.8528, "step": 2930 }, { "epoch": 0.05161607472041293, "grad_norm": 0.08896019860214284, "learning_rate": 0.00010319522471910112, "loss": 0.8734, "step": 2940 }, { "epoch": 0.05179163960041433, "grad_norm": 0.08591212361928517, "learning_rate": 0.00010354634831460675, "loss": 0.8659, "step": 2950 }, { "epoch": 0.05196720448041574, "grad_norm": 0.10448251442617819, "learning_rate": 0.00010389747191011237, "loss": 0.8653, "step": 2960 }, { "epoch": 0.05214276936041714, "grad_norm": 0.0808799666528056, "learning_rate": 0.00010424859550561798, "loss": 0.8733, "step": 2970 }, { "epoch": 0.05231833424041855, "grad_norm": 0.0940345095307922, "learning_rate": 0.00010459971910112361, "loss": 0.8648, "step": 2980 }, { "epoch": 0.05249389912041995, "grad_norm": 0.07052264219731792, "learning_rate": 0.00010495084269662922, "loss": 0.8725, "step": 2990 }, { "epoch": 0.05266946400042136, "grad_norm": 0.07885099466722859, "learning_rate": 0.00010530196629213485, "loss": 0.881, "step": 3000 }, { "epoch": 0.05284502888042276, "grad_norm": 0.0754634601812287, "learning_rate": 0.00010565308988764045, "loss": 0.873, "step": 3010 }, { "epoch": 0.053020593760424166, "grad_norm": 0.06814422978053401, "learning_rate": 0.00010600421348314606, "loss": 0.8665, "step": 3020 }, { "epoch": 0.05319615864042557, "grad_norm": 0.07786495136330572, "learning_rate": 0.00010635533707865169, "loss": 0.8673, "step": 3030 }, { "epoch": 0.053371723520426975, "grad_norm": 0.10207021620693676, "learning_rate": 0.0001067064606741573, "loss": 0.8735, "step": 3040 }, { "epoch": 0.053547288400428376, "grad_norm": 0.08564142990231689, "learning_rate": 0.00010705758426966293, "loss": 0.8689, "step": 3050 }, { "epoch": 0.053722853280429785, "grad_norm": 0.08114109637591382, "learning_rate": 0.00010740870786516854, "loss": 0.8679, "step": 3060 }, { "epoch": 0.053898418160431186, "grad_norm": 0.09546023065439535, "learning_rate": 0.00010775983146067417, "loss": 0.8693, "step": 3070 }, { "epoch": 0.054073983040432594, "grad_norm": 0.10860912735636992, "learning_rate": 0.00010811095505617979, "loss": 0.8712, "step": 3080 }, { "epoch": 0.054249547920433995, "grad_norm": 0.08801223964664094, "learning_rate": 0.0001084620786516854, "loss": 0.8682, "step": 3090 }, { "epoch": 0.0544251128004354, "grad_norm": 0.12172961077070571, "learning_rate": 0.00010881320224719103, "loss": 0.8699, "step": 3100 }, { "epoch": 0.054600677680436804, "grad_norm": 0.08241615340468754, "learning_rate": 0.00010916432584269663, "loss": 0.8674, "step": 3110 }, { "epoch": 0.05477624256043821, "grad_norm": 0.0865794758965367, "learning_rate": 0.00010951544943820224, "loss": 0.8822, "step": 3120 }, { "epoch": 0.054951807440439614, "grad_norm": 0.11719867689285764, "learning_rate": 0.00010986657303370787, "loss": 0.8656, "step": 3130 }, { "epoch": 0.05512737232044102, "grad_norm": 0.0766996241800066, "learning_rate": 0.00011021769662921348, "loss": 0.8705, "step": 3140 }, { "epoch": 0.05530293720044242, "grad_norm": 0.083148551011847, "learning_rate": 0.00011056882022471911, "loss": 0.8699, "step": 3150 }, { "epoch": 0.05547850208044383, "grad_norm": 0.09156534426029721, "learning_rate": 0.00011091994382022472, "loss": 0.8627, "step": 3160 }, { "epoch": 0.05565406696044523, "grad_norm": 0.07732744747798344, "learning_rate": 0.00011127106741573035, "loss": 0.8655, "step": 3170 }, { "epoch": 0.05582963184044664, "grad_norm": 0.07871542809557351, "learning_rate": 0.00011162219101123597, "loss": 0.8646, "step": 3180 }, { "epoch": 0.05600519672044804, "grad_norm": 0.06860547571216925, "learning_rate": 0.00011197331460674158, "loss": 0.8725, "step": 3190 }, { "epoch": 0.05618076160044944, "grad_norm": 0.09898952493015431, "learning_rate": 0.00011232443820224721, "loss": 0.8697, "step": 3200 }, { "epoch": 0.05635632648045085, "grad_norm": 0.10503791319987942, "learning_rate": 0.00011267556179775281, "loss": 0.8728, "step": 3210 }, { "epoch": 0.05653189136045225, "grad_norm": 0.113397681516741, "learning_rate": 0.00011302668539325842, "loss": 0.8692, "step": 3220 }, { "epoch": 0.05670745624045366, "grad_norm": 0.14823671882268247, "learning_rate": 0.00011337780898876405, "loss": 0.871, "step": 3230 }, { "epoch": 0.05688302112045506, "grad_norm": 0.08112289479811689, "learning_rate": 0.00011372893258426966, "loss": 0.8683, "step": 3240 }, { "epoch": 0.05705858600045647, "grad_norm": 0.06459786081889814, "learning_rate": 0.00011408005617977529, "loss": 0.8673, "step": 3250 }, { "epoch": 0.05723415088045787, "grad_norm": 0.09265222017632906, "learning_rate": 0.0001144311797752809, "loss": 0.869, "step": 3260 }, { "epoch": 0.05740971576045928, "grad_norm": 0.07766313734105677, "learning_rate": 0.00011478230337078653, "loss": 0.8756, "step": 3270 }, { "epoch": 0.05758528064046068, "grad_norm": 0.061711919032600045, "learning_rate": 0.00011513342696629215, "loss": 0.8662, "step": 3280 }, { "epoch": 0.05776084552046209, "grad_norm": 0.08438941281492143, "learning_rate": 0.00011548455056179777, "loss": 0.8728, "step": 3290 }, { "epoch": 0.05793641040046349, "grad_norm": 0.08242077296391899, "learning_rate": 0.00011583567415730339, "loss": 0.8586, "step": 3300 }, { "epoch": 0.0581119752804649, "grad_norm": 0.0924121867789103, "learning_rate": 0.00011618679775280899, "loss": 0.8731, "step": 3310 }, { "epoch": 0.0582875401604663, "grad_norm": 0.12274655377056946, "learning_rate": 0.0001165379213483146, "loss": 0.8644, "step": 3320 }, { "epoch": 0.05846310504046771, "grad_norm": 0.11344404884514878, "learning_rate": 0.00011688904494382023, "loss": 0.8673, "step": 3330 }, { "epoch": 0.05863866992046911, "grad_norm": 0.10983789543228578, "learning_rate": 0.00011724016853932584, "loss": 0.8707, "step": 3340 }, { "epoch": 0.058814234800470516, "grad_norm": 0.08721443834475422, "learning_rate": 0.00011759129213483147, "loss": 0.8695, "step": 3350 }, { "epoch": 0.05898979968047192, "grad_norm": 0.07856964717018013, "learning_rate": 0.00011794241573033708, "loss": 0.8685, "step": 3360 }, { "epoch": 0.059165364560473326, "grad_norm": 0.08079654160183043, "learning_rate": 0.00011829353932584271, "loss": 0.8669, "step": 3370 }, { "epoch": 0.05934092944047473, "grad_norm": 0.07066320977870835, "learning_rate": 0.00011864466292134832, "loss": 0.8656, "step": 3380 }, { "epoch": 0.059516494320476135, "grad_norm": 0.08974000064477358, "learning_rate": 0.00011899578651685395, "loss": 0.868, "step": 3390 }, { "epoch": 0.059692059200477536, "grad_norm": 0.09442930657707849, "learning_rate": 0.00011934691011235957, "loss": 0.8713, "step": 3400 }, { "epoch": 0.059867624080478944, "grad_norm": 0.09146779738026516, "learning_rate": 0.00011969803370786517, "loss": 0.8618, "step": 3410 }, { "epoch": 0.060043188960480345, "grad_norm": 0.06633853866889391, "learning_rate": 0.00012004915730337078, "loss": 0.8717, "step": 3420 }, { "epoch": 0.06021875384048175, "grad_norm": 0.06690691662235126, "learning_rate": 0.00012040028089887641, "loss": 0.8681, "step": 3430 }, { "epoch": 0.060394318720483155, "grad_norm": 0.06881263548470298, "learning_rate": 0.00012075140449438202, "loss": 0.8668, "step": 3440 }, { "epoch": 0.060569883600484556, "grad_norm": 0.07719744730115442, "learning_rate": 0.00012110252808988765, "loss": 0.8733, "step": 3450 }, { "epoch": 0.060745448480485964, "grad_norm": 0.07070604496656566, "learning_rate": 0.00012145365168539326, "loss": 0.8605, "step": 3460 }, { "epoch": 0.060921013360487365, "grad_norm": 0.09341392473875189, "learning_rate": 0.00012180477528089889, "loss": 0.8677, "step": 3470 }, { "epoch": 0.06109657824048877, "grad_norm": 0.0918776267489599, "learning_rate": 0.0001221558988764045, "loss": 0.8632, "step": 3480 }, { "epoch": 0.061272143120490175, "grad_norm": 0.1591291175325041, "learning_rate": 0.00012250702247191013, "loss": 0.8677, "step": 3490 }, { "epoch": 0.06144770800049158, "grad_norm": 0.08929748142315258, "learning_rate": 0.00012285814606741575, "loss": 0.8738, "step": 3500 }, { "epoch": 0.061623272880492984, "grad_norm": 0.09921196662287508, "learning_rate": 0.00012320926966292135, "loss": 0.8639, "step": 3510 }, { "epoch": 0.06179883776049439, "grad_norm": 0.06902446483157539, "learning_rate": 0.00012356039325842698, "loss": 0.868, "step": 3520 }, { "epoch": 0.06197440264049579, "grad_norm": 0.12024732140770085, "learning_rate": 0.00012391151685393258, "loss": 0.8741, "step": 3530 }, { "epoch": 0.0621499675204972, "grad_norm": 0.07097550208531664, "learning_rate": 0.0001242626404494382, "loss": 0.865, "step": 3540 }, { "epoch": 0.0623255324004986, "grad_norm": 0.08653807971129462, "learning_rate": 0.00012461376404494383, "loss": 0.8739, "step": 3550 }, { "epoch": 0.0625010972805, "grad_norm": 0.09075586971741764, "learning_rate": 0.00012496488764044946, "loss": 0.871, "step": 3560 }, { "epoch": 0.06267666216050141, "grad_norm": 0.08939137018393155, "learning_rate": 0.00012531601123595506, "loss": 0.8649, "step": 3570 }, { "epoch": 0.06285222704050282, "grad_norm": 0.1073606370412846, "learning_rate": 0.00012566713483146068, "loss": 0.8763, "step": 3580 }, { "epoch": 0.06302779192050423, "grad_norm": 0.09772752366937706, "learning_rate": 0.0001260182584269663, "loss": 0.8694, "step": 3590 }, { "epoch": 0.06320335680050562, "grad_norm": 0.10046149176689853, "learning_rate": 0.00012636938202247193, "loss": 0.8666, "step": 3600 }, { "epoch": 0.06337892168050703, "grad_norm": 0.09555825182241347, "learning_rate": 0.00012672050561797753, "loss": 0.8776, "step": 3610 }, { "epoch": 0.06355448656050844, "grad_norm": 0.07682773877827448, "learning_rate": 0.00012707162921348313, "loss": 0.8643, "step": 3620 }, { "epoch": 0.06373005144050985, "grad_norm": 0.08374957848003826, "learning_rate": 0.00012742275280898876, "loss": 0.8704, "step": 3630 }, { "epoch": 0.06390561632051124, "grad_norm": 0.08844890922689236, "learning_rate": 0.00012777387640449438, "loss": 0.8726, "step": 3640 }, { "epoch": 0.06408118120051265, "grad_norm": 0.09399630003450928, "learning_rate": 0.000128125, "loss": 0.8667, "step": 3650 }, { "epoch": 0.06425674608051406, "grad_norm": 0.07835107143620082, "learning_rate": 0.00012847612359550564, "loss": 0.8655, "step": 3660 }, { "epoch": 0.06443231096051547, "grad_norm": 0.08024847587044613, "learning_rate": 0.00012882724719101124, "loss": 0.8677, "step": 3670 }, { "epoch": 0.06460787584051686, "grad_norm": 0.07468031044444032, "learning_rate": 0.00012917837078651686, "loss": 0.8638, "step": 3680 }, { "epoch": 0.06478344072051827, "grad_norm": 0.08474352502966452, "learning_rate": 0.0001295294943820225, "loss": 0.8719, "step": 3690 }, { "epoch": 0.06495900560051968, "grad_norm": 0.0899589308903606, "learning_rate": 0.0001298806179775281, "loss": 0.8687, "step": 3700 }, { "epoch": 0.06513457048052107, "grad_norm": 0.0860965337425579, "learning_rate": 0.00013023174157303371, "loss": 0.8704, "step": 3710 }, { "epoch": 0.06531013536052248, "grad_norm": 0.08497707086525352, "learning_rate": 0.0001305828651685393, "loss": 0.8651, "step": 3720 }, { "epoch": 0.06548570024052389, "grad_norm": 0.0634556600068835, "learning_rate": 0.00013093398876404494, "loss": 0.8692, "step": 3730 }, { "epoch": 0.0656612651205253, "grad_norm": 0.08224388864604819, "learning_rate": 0.00013128511235955057, "loss": 0.8638, "step": 3740 }, { "epoch": 0.06583683000052669, "grad_norm": 0.07870191128854613, "learning_rate": 0.0001316362359550562, "loss": 0.8616, "step": 3750 }, { "epoch": 0.0660123948805281, "grad_norm": 0.08332245595149168, "learning_rate": 0.00013198735955056182, "loss": 0.8661, "step": 3760 }, { "epoch": 0.0661879597605295, "grad_norm": 0.08866736905289187, "learning_rate": 0.00013233848314606742, "loss": 0.8708, "step": 3770 }, { "epoch": 0.06636352464053091, "grad_norm": 0.07721877461615047, "learning_rate": 0.00013268960674157304, "loss": 0.874, "step": 3780 }, { "epoch": 0.06653908952053231, "grad_norm": 0.09724981795207698, "learning_rate": 0.00013304073033707867, "loss": 0.8749, "step": 3790 }, { "epoch": 0.06671465440053372, "grad_norm": 0.11893054087661478, "learning_rate": 0.00013339185393258427, "loss": 0.8755, "step": 3800 }, { "epoch": 0.06689021928053512, "grad_norm": 0.10137589262985941, "learning_rate": 0.0001337429775280899, "loss": 0.8697, "step": 3810 }, { "epoch": 0.06706578416053653, "grad_norm": 0.07216692843322385, "learning_rate": 0.0001340941011235955, "loss": 0.8695, "step": 3820 }, { "epoch": 0.06724134904053793, "grad_norm": 0.06748229359881665, "learning_rate": 0.00013444522471910112, "loss": 0.8731, "step": 3830 }, { "epoch": 0.06741691392053933, "grad_norm": 0.08631753256432514, "learning_rate": 0.00013479634831460675, "loss": 0.8666, "step": 3840 }, { "epoch": 0.06759247880054074, "grad_norm": 0.09485659253323388, "learning_rate": 0.00013514747191011237, "loss": 0.8609, "step": 3850 }, { "epoch": 0.06776804368054215, "grad_norm": 0.10023775477120957, "learning_rate": 0.000135498595505618, "loss": 0.8717, "step": 3860 }, { "epoch": 0.06794360856054354, "grad_norm": 0.0867118224133698, "learning_rate": 0.0001358497191011236, "loss": 0.8698, "step": 3870 }, { "epoch": 0.06811917344054495, "grad_norm": 0.1105233685981463, "learning_rate": 0.00013620084269662922, "loss": 0.8669, "step": 3880 }, { "epoch": 0.06829473832054636, "grad_norm": 0.0949806796508284, "learning_rate": 0.00013655196629213485, "loss": 0.8722, "step": 3890 }, { "epoch": 0.06847030320054777, "grad_norm": 0.08375678745285936, "learning_rate": 0.00013690308988764045, "loss": 0.8739, "step": 3900 }, { "epoch": 0.06864586808054916, "grad_norm": 0.0644081927936635, "learning_rate": 0.00013725421348314608, "loss": 0.8661, "step": 3910 }, { "epoch": 0.06882143296055057, "grad_norm": 0.09912616189707003, "learning_rate": 0.00013760533707865167, "loss": 0.8676, "step": 3920 }, { "epoch": 0.06899699784055198, "grad_norm": 0.12052094066945607, "learning_rate": 0.0001379564606741573, "loss": 0.8619, "step": 3930 }, { "epoch": 0.06917256272055337, "grad_norm": 0.09153774261297007, "learning_rate": 0.00013830758426966293, "loss": 0.8754, "step": 3940 }, { "epoch": 0.06934812760055478, "grad_norm": 0.0969384167605854, "learning_rate": 0.00013865870786516855, "loss": 0.8683, "step": 3950 }, { "epoch": 0.06952369248055619, "grad_norm": 0.08974666853226083, "learning_rate": 0.00013900983146067418, "loss": 0.8665, "step": 3960 }, { "epoch": 0.0696992573605576, "grad_norm": 0.09511105897158184, "learning_rate": 0.00013936095505617978, "loss": 0.8666, "step": 3970 }, { "epoch": 0.06987482224055899, "grad_norm": 0.10151448001847216, "learning_rate": 0.0001397120786516854, "loss": 0.8717, "step": 3980 }, { "epoch": 0.0700503871205604, "grad_norm": 0.08104035612694072, "learning_rate": 0.00014006320224719103, "loss": 0.8739, "step": 3990 }, { "epoch": 0.07022595200056181, "grad_norm": 0.07699135641787441, "learning_rate": 0.00014041432584269663, "loss": 0.8744, "step": 4000 }, { "epoch": 0.07040151688056322, "grad_norm": 0.07000581689066553, "learning_rate": 0.00014076544943820226, "loss": 0.86, "step": 4010 }, { "epoch": 0.07057708176056461, "grad_norm": 0.08855740751215302, "learning_rate": 0.00014111657303370786, "loss": 0.871, "step": 4020 }, { "epoch": 0.07075264664056602, "grad_norm": 0.08166610452330605, "learning_rate": 0.00014146769662921348, "loss": 0.8685, "step": 4030 }, { "epoch": 0.07092821152056743, "grad_norm": 0.11790191119551124, "learning_rate": 0.0001418188202247191, "loss": 0.8677, "step": 4040 }, { "epoch": 0.07110377640056884, "grad_norm": 0.13799654867093156, "learning_rate": 0.00014216994382022473, "loss": 0.8582, "step": 4050 }, { "epoch": 0.07127934128057023, "grad_norm": 0.07327678821618046, "learning_rate": 0.00014252106741573036, "loss": 0.8675, "step": 4060 }, { "epoch": 0.07145490616057164, "grad_norm": 0.07863410269343978, "learning_rate": 0.00014287219101123596, "loss": 0.869, "step": 4070 }, { "epoch": 0.07163047104057305, "grad_norm": 0.07542836260788668, "learning_rate": 0.00014322331460674159, "loss": 0.8659, "step": 4080 }, { "epoch": 0.07180603592057445, "grad_norm": 0.11071694874391441, "learning_rate": 0.0001435744382022472, "loss": 0.8711, "step": 4090 }, { "epoch": 0.07198160080057585, "grad_norm": 0.07668577818265196, "learning_rate": 0.0001439255617977528, "loss": 0.8641, "step": 4100 }, { "epoch": 0.07215716568057726, "grad_norm": 0.06880767046653036, "learning_rate": 0.00014427668539325844, "loss": 0.8684, "step": 4110 }, { "epoch": 0.07233273056057866, "grad_norm": 0.09301324305077331, "learning_rate": 0.00014462780898876404, "loss": 0.8779, "step": 4120 }, { "epoch": 0.07250829544058007, "grad_norm": 0.0926534904459085, "learning_rate": 0.00014497893258426966, "loss": 0.8666, "step": 4130 }, { "epoch": 0.07268386032058147, "grad_norm": 0.10438820316693159, "learning_rate": 0.0001453300561797753, "loss": 0.8676, "step": 4140 }, { "epoch": 0.07285942520058288, "grad_norm": 0.10819014876339289, "learning_rate": 0.00014568117977528091, "loss": 0.8747, "step": 4150 }, { "epoch": 0.07303499008058428, "grad_norm": 0.07580563707929784, "learning_rate": 0.00014603230337078654, "loss": 0.8723, "step": 4160 }, { "epoch": 0.07321055496058568, "grad_norm": 0.07846311969047703, "learning_rate": 0.00014638342696629214, "loss": 0.8662, "step": 4170 }, { "epoch": 0.07338611984058709, "grad_norm": 0.07983271774368964, "learning_rate": 0.00014673455056179777, "loss": 0.863, "step": 4180 }, { "epoch": 0.0735616847205885, "grad_norm": 0.0825969791220882, "learning_rate": 0.0001470856741573034, "loss": 0.8658, "step": 4190 }, { "epoch": 0.0737372496005899, "grad_norm": 0.07234958039630493, "learning_rate": 0.000147436797752809, "loss": 0.8682, "step": 4200 }, { "epoch": 0.0739128144805913, "grad_norm": 0.08174310148214484, "learning_rate": 0.00014778792134831462, "loss": 0.8691, "step": 4210 }, { "epoch": 0.0740883793605927, "grad_norm": 0.07186689397781615, "learning_rate": 0.00014813904494382022, "loss": 0.8676, "step": 4220 }, { "epoch": 0.07426394424059411, "grad_norm": 0.08461427893764355, "learning_rate": 0.00014849016853932584, "loss": 0.8649, "step": 4230 }, { "epoch": 0.07443950912059552, "grad_norm": 0.06589368503481277, "learning_rate": 0.00014884129213483147, "loss": 0.8726, "step": 4240 }, { "epoch": 0.07461507400059691, "grad_norm": 0.06367472494772826, "learning_rate": 0.0001491924157303371, "loss": 0.8709, "step": 4250 }, { "epoch": 0.07479063888059832, "grad_norm": 0.09853738913632547, "learning_rate": 0.0001495435393258427, "loss": 0.8581, "step": 4260 }, { "epoch": 0.07496620376059973, "grad_norm": 0.10839327616134614, "learning_rate": 0.00014989466292134832, "loss": 0.8673, "step": 4270 }, { "epoch": 0.07514176864060114, "grad_norm": 0.08450106202556797, "learning_rate": 0.00015024578651685395, "loss": 0.8717, "step": 4280 }, { "epoch": 0.07531733352060253, "grad_norm": 0.09038532471554327, "learning_rate": 0.00015059691011235957, "loss": 0.8656, "step": 4290 }, { "epoch": 0.07549289840060394, "grad_norm": 0.08629338423113897, "learning_rate": 0.00015094803370786517, "loss": 0.8682, "step": 4300 }, { "epoch": 0.07566846328060535, "grad_norm": 0.0750463548101676, "learning_rate": 0.0001512991573033708, "loss": 0.8627, "step": 4310 }, { "epoch": 0.07584402816060676, "grad_norm": 0.07337608217570649, "learning_rate": 0.0001516502808988764, "loss": 0.8785, "step": 4320 }, { "epoch": 0.07601959304060815, "grad_norm": 0.08405988308033217, "learning_rate": 0.00015200140449438202, "loss": 0.867, "step": 4330 }, { "epoch": 0.07619515792060956, "grad_norm": 0.0701783258009735, "learning_rate": 0.00015235252808988765, "loss": 0.8662, "step": 4340 }, { "epoch": 0.07637072280061097, "grad_norm": 0.08586204400052112, "learning_rate": 0.00015270365168539328, "loss": 0.883, "step": 4350 }, { "epoch": 0.07654628768061238, "grad_norm": 0.0769610592561428, "learning_rate": 0.00015305477528089888, "loss": 0.8637, "step": 4360 }, { "epoch": 0.07672185256061377, "grad_norm": 0.07998400035283786, "learning_rate": 0.0001534058988764045, "loss": 0.8614, "step": 4370 }, { "epoch": 0.07689741744061518, "grad_norm": 0.10840681872396715, "learning_rate": 0.00015375702247191013, "loss": 0.8676, "step": 4380 }, { "epoch": 0.07707298232061659, "grad_norm": 0.09646002316237622, "learning_rate": 0.00015410814606741575, "loss": 0.8688, "step": 4390 }, { "epoch": 0.077248547200618, "grad_norm": 0.07038237154544184, "learning_rate": 0.00015445926966292135, "loss": 0.8589, "step": 4400 }, { "epoch": 0.07742411208061939, "grad_norm": 0.0854281808452838, "learning_rate": 0.00015481039325842698, "loss": 0.8661, "step": 4410 }, { "epoch": 0.0775996769606208, "grad_norm": 0.07270387940775015, "learning_rate": 0.00015516151685393258, "loss": 0.8695, "step": 4420 }, { "epoch": 0.0777752418406222, "grad_norm": 0.07818967782742531, "learning_rate": 0.0001555126404494382, "loss": 0.8672, "step": 4430 }, { "epoch": 0.0779508067206236, "grad_norm": 0.10054700292155842, "learning_rate": 0.00015586376404494383, "loss": 0.868, "step": 4440 }, { "epoch": 0.07812637160062501, "grad_norm": 0.07337802633079017, "learning_rate": 0.00015621488764044946, "loss": 0.8685, "step": 4450 }, { "epoch": 0.07830193648062642, "grad_norm": 0.07292774031953334, "learning_rate": 0.00015656601123595506, "loss": 0.8703, "step": 4460 }, { "epoch": 0.07847750136062782, "grad_norm": 0.09110409688313115, "learning_rate": 0.00015691713483146068, "loss": 0.8637, "step": 4470 }, { "epoch": 0.07865306624062922, "grad_norm": 0.10994224724783667, "learning_rate": 0.0001572682584269663, "loss": 0.8642, "step": 4480 }, { "epoch": 0.07882863112063063, "grad_norm": 0.07514051930797577, "learning_rate": 0.00015761938202247193, "loss": 0.876, "step": 4490 }, { "epoch": 0.07900419600063203, "grad_norm": 0.08473126096815146, "learning_rate": 0.00015797050561797753, "loss": 0.8694, "step": 4500 }, { "epoch": 0.07917976088063344, "grad_norm": 0.05889963636699825, "learning_rate": 0.00015832162921348316, "loss": 0.8693, "step": 4510 }, { "epoch": 0.07935532576063484, "grad_norm": 0.09644216724875465, "learning_rate": 0.00015867275280898876, "loss": 0.868, "step": 4520 }, { "epoch": 0.07953089064063625, "grad_norm": 0.08330160201996087, "learning_rate": 0.00015902387640449439, "loss": 0.8721, "step": 4530 }, { "epoch": 0.07970645552063765, "grad_norm": 0.0771144573061966, "learning_rate": 0.000159375, "loss": 0.8703, "step": 4540 }, { "epoch": 0.07988202040063906, "grad_norm": 0.07320722269239563, "learning_rate": 0.00015972612359550564, "loss": 0.864, "step": 4550 }, { "epoch": 0.08005758528064046, "grad_norm": 0.07410639814814954, "learning_rate": 0.00016007724719101124, "loss": 0.8689, "step": 4560 }, { "epoch": 0.08023315016064186, "grad_norm": 0.08436512009270412, "learning_rate": 0.00016042837078651686, "loss": 0.8646, "step": 4570 }, { "epoch": 0.08040871504064327, "grad_norm": 0.08810186765455941, "learning_rate": 0.0001607794943820225, "loss": 0.8697, "step": 4580 }, { "epoch": 0.08058427992064468, "grad_norm": 0.10799389452800459, "learning_rate": 0.0001611306179775281, "loss": 0.8657, "step": 4590 }, { "epoch": 0.08075984480064607, "grad_norm": 0.07023168870357091, "learning_rate": 0.00016148174157303371, "loss": 0.8694, "step": 4600 }, { "epoch": 0.08093540968064748, "grad_norm": 0.1087335669987877, "learning_rate": 0.00016183286516853931, "loss": 0.8647, "step": 4610 }, { "epoch": 0.08111097456064889, "grad_norm": 0.07648697691679036, "learning_rate": 0.00016218398876404494, "loss": 0.8732, "step": 4620 }, { "epoch": 0.0812865394406503, "grad_norm": 0.08907095196687016, "learning_rate": 0.00016253511235955057, "loss": 0.8664, "step": 4630 }, { "epoch": 0.0814621043206517, "grad_norm": 0.08090337098455115, "learning_rate": 0.0001628862359550562, "loss": 0.8735, "step": 4640 }, { "epoch": 0.0816376692006531, "grad_norm": 0.07864039379613462, "learning_rate": 0.00016323735955056182, "loss": 0.8729, "step": 4650 }, { "epoch": 0.08181323408065451, "grad_norm": 0.11340875558156789, "learning_rate": 0.00016358848314606742, "loss": 0.8699, "step": 4660 }, { "epoch": 0.0819887989606559, "grad_norm": 0.07735852976232371, "learning_rate": 0.00016393960674157304, "loss": 0.8602, "step": 4670 }, { "epoch": 0.08216436384065731, "grad_norm": 0.08806413794618977, "learning_rate": 0.00016429073033707867, "loss": 0.8678, "step": 4680 }, { "epoch": 0.08233992872065872, "grad_norm": 0.09118430042747845, "learning_rate": 0.00016464185393258427, "loss": 0.862, "step": 4690 }, { "epoch": 0.08251549360066013, "grad_norm": 0.07886876913844895, "learning_rate": 0.0001649929775280899, "loss": 0.8578, "step": 4700 }, { "epoch": 0.08269105848066152, "grad_norm": 0.09063438315459828, "learning_rate": 0.0001653441011235955, "loss": 0.8628, "step": 4710 }, { "epoch": 0.08286662336066293, "grad_norm": 0.10755783807824873, "learning_rate": 0.00016569522471910112, "loss": 0.8722, "step": 4720 }, { "epoch": 0.08304218824066434, "grad_norm": 0.06498041183357131, "learning_rate": 0.00016604634831460675, "loss": 0.8632, "step": 4730 }, { "epoch": 0.08321775312066575, "grad_norm": 0.07473440843261468, "learning_rate": 0.00016639747191011237, "loss": 0.865, "step": 4740 }, { "epoch": 0.08339331800066714, "grad_norm": 0.148929785661174, "learning_rate": 0.000166748595505618, "loss": 0.8677, "step": 4750 }, { "epoch": 0.08356888288066855, "grad_norm": 0.06992532896317065, "learning_rate": 0.0001670997191011236, "loss": 0.8676, "step": 4760 }, { "epoch": 0.08374444776066996, "grad_norm": 0.08049927042262585, "learning_rate": 0.00016745084269662922, "loss": 0.8628, "step": 4770 }, { "epoch": 0.08392001264067137, "grad_norm": 0.07725685529048437, "learning_rate": 0.00016780196629213485, "loss": 0.8651, "step": 4780 }, { "epoch": 0.08409557752067276, "grad_norm": 0.08866309175750381, "learning_rate": 0.00016815308988764045, "loss": 0.8649, "step": 4790 }, { "epoch": 0.08427114240067417, "grad_norm": 0.07268164801108132, "learning_rate": 0.00016850421348314608, "loss": 0.8619, "step": 4800 }, { "epoch": 0.08444670728067558, "grad_norm": 0.08583037056922549, "learning_rate": 0.00016885533707865168, "loss": 0.8691, "step": 4810 }, { "epoch": 0.08462227216067698, "grad_norm": 0.09705894012417274, "learning_rate": 0.0001692064606741573, "loss": 0.8608, "step": 4820 }, { "epoch": 0.08479783704067838, "grad_norm": 0.09740745345970428, "learning_rate": 0.00016955758426966293, "loss": 0.8686, "step": 4830 }, { "epoch": 0.08497340192067979, "grad_norm": 0.06227523777486907, "learning_rate": 0.00016990870786516855, "loss": 0.8724, "step": 4840 }, { "epoch": 0.0851489668006812, "grad_norm": 0.07892537529720367, "learning_rate": 0.00017025983146067418, "loss": 0.8685, "step": 4850 }, { "epoch": 0.0853245316806826, "grad_norm": 0.07015226131016203, "learning_rate": 0.00017061095505617978, "loss": 0.8661, "step": 4860 }, { "epoch": 0.085500096560684, "grad_norm": 0.10831127654925424, "learning_rate": 0.0001709620786516854, "loss": 0.8781, "step": 4870 }, { "epoch": 0.0856756614406854, "grad_norm": 0.07290902209263958, "learning_rate": 0.00017131320224719103, "loss": 0.8664, "step": 4880 }, { "epoch": 0.08585122632068681, "grad_norm": 0.060254225502320215, "learning_rate": 0.00017166432584269663, "loss": 0.8595, "step": 4890 }, { "epoch": 0.08602679120068821, "grad_norm": 0.09287350217820713, "learning_rate": 0.00017201544943820226, "loss": 0.8713, "step": 4900 }, { "epoch": 0.08620235608068962, "grad_norm": 0.08081451244307865, "learning_rate": 0.00017236657303370786, "loss": 0.8618, "step": 4910 }, { "epoch": 0.08637792096069102, "grad_norm": 0.08625108801626413, "learning_rate": 0.00017271769662921348, "loss": 0.8703, "step": 4920 }, { "epoch": 0.08655348584069243, "grad_norm": 0.06292383949617689, "learning_rate": 0.0001730688202247191, "loss": 0.8683, "step": 4930 }, { "epoch": 0.08672905072069383, "grad_norm": 0.06638756545664112, "learning_rate": 0.00017341994382022473, "loss": 0.8645, "step": 4940 }, { "epoch": 0.08690461560069523, "grad_norm": 0.09739006317211192, "learning_rate": 0.00017377106741573036, "loss": 0.8711, "step": 4950 }, { "epoch": 0.08708018048069664, "grad_norm": 0.06535366262039126, "learning_rate": 0.00017412219101123596, "loss": 0.8644, "step": 4960 }, { "epoch": 0.08725574536069805, "grad_norm": 0.07121829780754685, "learning_rate": 0.00017447331460674159, "loss": 0.8683, "step": 4970 }, { "epoch": 0.08743131024069944, "grad_norm": 0.06474340894680934, "learning_rate": 0.0001748244382022472, "loss": 0.8634, "step": 4980 }, { "epoch": 0.08760687512070085, "grad_norm": 0.12692213524882914, "learning_rate": 0.0001751755617977528, "loss": 0.8623, "step": 4990 }, { "epoch": 0.08778244000070226, "grad_norm": 0.0950664795038832, "learning_rate": 0.00017552668539325844, "loss": 0.8699, "step": 5000 }, { "epoch": 0.08795800488070367, "grad_norm": 0.07260680382355068, "learning_rate": 0.00017587780898876404, "loss": 0.8698, "step": 5010 }, { "epoch": 0.08813356976070506, "grad_norm": 0.08006083335946487, "learning_rate": 0.00017622893258426966, "loss": 0.8662, "step": 5020 }, { "epoch": 0.08830913464070647, "grad_norm": 0.08955359871919073, "learning_rate": 0.0001765800561797753, "loss": 0.8582, "step": 5030 }, { "epoch": 0.08848469952070788, "grad_norm": 0.0707756510968693, "learning_rate": 0.00017693117977528092, "loss": 0.866, "step": 5040 }, { "epoch": 0.08866026440070929, "grad_norm": 0.07998471007407254, "learning_rate": 0.00017728230337078654, "loss": 0.8742, "step": 5050 }, { "epoch": 0.08883582928071068, "grad_norm": 0.08220301787851143, "learning_rate": 0.00017763342696629214, "loss": 0.8744, "step": 5060 }, { "epoch": 0.08901139416071209, "grad_norm": 0.11196912685369766, "learning_rate": 0.00017798455056179777, "loss": 0.867, "step": 5070 }, { "epoch": 0.0891869590407135, "grad_norm": 0.08960575960300038, "learning_rate": 0.0001783356741573034, "loss": 0.8706, "step": 5080 }, { "epoch": 0.0893625239207149, "grad_norm": 0.11055748637690853, "learning_rate": 0.000178686797752809, "loss": 0.8614, "step": 5090 }, { "epoch": 0.0895380888007163, "grad_norm": 0.07733886541459249, "learning_rate": 0.00017903792134831462, "loss": 0.8661, "step": 5100 }, { "epoch": 0.08971365368071771, "grad_norm": 0.07175029446649613, "learning_rate": 0.00017938904494382022, "loss": 0.8653, "step": 5110 }, { "epoch": 0.08988921856071912, "grad_norm": 0.07954521033221447, "learning_rate": 0.00017974016853932584, "loss": 0.8718, "step": 5120 }, { "epoch": 0.09006478344072052, "grad_norm": 0.0877358714596494, "learning_rate": 0.00018009129213483147, "loss": 0.8679, "step": 5130 }, { "epoch": 0.09024034832072192, "grad_norm": 0.09314894763089218, "learning_rate": 0.0001804424157303371, "loss": 0.8704, "step": 5140 }, { "epoch": 0.09041591320072333, "grad_norm": 0.10118294864548553, "learning_rate": 0.00018079353932584272, "loss": 0.8641, "step": 5150 }, { "epoch": 0.09059147808072474, "grad_norm": 0.10062214394123596, "learning_rate": 0.00018114466292134832, "loss": 0.8666, "step": 5160 }, { "epoch": 0.09076704296072613, "grad_norm": 0.08645117471089833, "learning_rate": 0.00018149578651685395, "loss": 0.8728, "step": 5170 }, { "epoch": 0.09094260784072754, "grad_norm": 0.08680755326743796, "learning_rate": 0.00018184691011235957, "loss": 0.8747, "step": 5180 }, { "epoch": 0.09111817272072895, "grad_norm": 0.0746449117078722, "learning_rate": 0.00018219803370786517, "loss": 0.8714, "step": 5190 }, { "epoch": 0.09129373760073035, "grad_norm": 0.08648526637792477, "learning_rate": 0.0001825491573033708, "loss": 0.8622, "step": 5200 }, { "epoch": 0.09146930248073175, "grad_norm": 0.13431745028293973, "learning_rate": 0.0001829002808988764, "loss": 0.8594, "step": 5210 }, { "epoch": 0.09164486736073316, "grad_norm": 0.06981457627834535, "learning_rate": 0.00018325140449438202, "loss": 0.8681, "step": 5220 }, { "epoch": 0.09182043224073456, "grad_norm": 0.1130522452706824, "learning_rate": 0.00018360252808988765, "loss": 0.8688, "step": 5230 }, { "epoch": 0.09199599712073597, "grad_norm": 0.10309214926270541, "learning_rate": 0.00018395365168539328, "loss": 0.8657, "step": 5240 }, { "epoch": 0.09217156200073737, "grad_norm": 0.09141805361926296, "learning_rate": 0.00018430477528089888, "loss": 0.8599, "step": 5250 }, { "epoch": 0.09234712688073878, "grad_norm": 0.10325173008054479, "learning_rate": 0.0001846558988764045, "loss": 0.8594, "step": 5260 }, { "epoch": 0.09252269176074018, "grad_norm": 0.08491881491829187, "learning_rate": 0.00018500702247191013, "loss": 0.8688, "step": 5270 }, { "epoch": 0.09269825664074159, "grad_norm": 0.08887562234822251, "learning_rate": 0.00018535814606741575, "loss": 0.8583, "step": 5280 }, { "epoch": 0.09287382152074299, "grad_norm": 0.08426215291625319, "learning_rate": 0.00018570926966292135, "loss": 0.8646, "step": 5290 }, { "epoch": 0.0930493864007444, "grad_norm": 0.0827792330446448, "learning_rate": 0.00018606039325842698, "loss": 0.864, "step": 5300 }, { "epoch": 0.0932249512807458, "grad_norm": 0.07766589011550949, "learning_rate": 0.00018641151685393258, "loss": 0.8668, "step": 5310 }, { "epoch": 0.09340051616074721, "grad_norm": 0.09520977824458415, "learning_rate": 0.0001867626404494382, "loss": 0.871, "step": 5320 }, { "epoch": 0.0935760810407486, "grad_norm": 0.06359435360186315, "learning_rate": 0.00018711376404494383, "loss": 0.8651, "step": 5330 }, { "epoch": 0.09375164592075001, "grad_norm": 0.06989474927666056, "learning_rate": 0.00018746488764044946, "loss": 0.869, "step": 5340 }, { "epoch": 0.09392721080075142, "grad_norm": 0.07663947538179294, "learning_rate": 0.00018781601123595506, "loss": 0.8668, "step": 5350 }, { "epoch": 0.09410277568075283, "grad_norm": 0.08971002887767697, "learning_rate": 0.00018816713483146068, "loss": 0.8625, "step": 5360 }, { "epoch": 0.09427834056075422, "grad_norm": 0.10046597281574413, "learning_rate": 0.0001885182584269663, "loss": 0.8603, "step": 5370 }, { "epoch": 0.09445390544075563, "grad_norm": 0.0826615082943464, "learning_rate": 0.00018886938202247194, "loss": 0.8662, "step": 5380 }, { "epoch": 0.09462947032075704, "grad_norm": 0.12038149841092534, "learning_rate": 0.00018922050561797753, "loss": 0.8704, "step": 5390 }, { "epoch": 0.09480503520075843, "grad_norm": 0.07572646972459902, "learning_rate": 0.00018957162921348316, "loss": 0.8683, "step": 5400 }, { "epoch": 0.09498060008075984, "grad_norm": 0.08678404926154863, "learning_rate": 0.00018992275280898876, "loss": 0.867, "step": 5410 }, { "epoch": 0.09515616496076125, "grad_norm": 0.0770515566215959, "learning_rate": 0.00019027387640449439, "loss": 0.8656, "step": 5420 }, { "epoch": 0.09533172984076266, "grad_norm": 0.07366276219931929, "learning_rate": 0.000190625, "loss": 0.8649, "step": 5430 }, { "epoch": 0.09550729472076405, "grad_norm": 0.09576967283134216, "learning_rate": 0.00019097612359550564, "loss": 0.8704, "step": 5440 }, { "epoch": 0.09568285960076546, "grad_norm": 0.10867746900587937, "learning_rate": 0.00019132724719101124, "loss": 0.8694, "step": 5450 }, { "epoch": 0.09585842448076687, "grad_norm": 0.09173761799700574, "learning_rate": 0.00019167837078651686, "loss": 0.8678, "step": 5460 }, { "epoch": 0.09603398936076828, "grad_norm": 0.08497574132058541, "learning_rate": 0.0001920294943820225, "loss": 0.8632, "step": 5470 }, { "epoch": 0.09620955424076967, "grad_norm": 0.10063514854313432, "learning_rate": 0.0001923806179775281, "loss": 0.8569, "step": 5480 }, { "epoch": 0.09638511912077108, "grad_norm": 0.07634828188980673, "learning_rate": 0.00019273174157303372, "loss": 0.8705, "step": 5490 }, { "epoch": 0.09656068400077249, "grad_norm": 0.08062230045213784, "learning_rate": 0.00019308286516853934, "loss": 0.8655, "step": 5500 }, { "epoch": 0.0967362488807739, "grad_norm": 0.07785635604700651, "learning_rate": 0.00019343398876404494, "loss": 0.869, "step": 5510 }, { "epoch": 0.09691181376077529, "grad_norm": 0.08388067460689601, "learning_rate": 0.00019378511235955057, "loss": 0.8711, "step": 5520 }, { "epoch": 0.0970873786407767, "grad_norm": 0.08094237436290566, "learning_rate": 0.0001941362359550562, "loss": 0.858, "step": 5530 }, { "epoch": 0.0972629435207781, "grad_norm": 0.07257938176280468, "learning_rate": 0.00019448735955056182, "loss": 0.8656, "step": 5540 }, { "epoch": 0.09743850840077951, "grad_norm": 0.0660216945109028, "learning_rate": 0.00019483848314606742, "loss": 0.874, "step": 5550 }, { "epoch": 0.09761407328078091, "grad_norm": 0.07336033031504356, "learning_rate": 0.00019518960674157304, "loss": 0.8693, "step": 5560 }, { "epoch": 0.09778963816078232, "grad_norm": 0.11475596532129542, "learning_rate": 0.00019554073033707867, "loss": 0.866, "step": 5570 }, { "epoch": 0.09796520304078372, "grad_norm": 0.06676005376026309, "learning_rate": 0.00019589185393258427, "loss": 0.8668, "step": 5580 }, { "epoch": 0.09814076792078513, "grad_norm": 0.11670832249241093, "learning_rate": 0.0001962429775280899, "loss": 0.8646, "step": 5590 }, { "epoch": 0.09831633280078653, "grad_norm": 0.0946479465102158, "learning_rate": 0.00019659410112359552, "loss": 0.8607, "step": 5600 }, { "epoch": 0.09849189768078793, "grad_norm": 0.0903312358020182, "learning_rate": 0.00019694522471910112, "loss": 0.8725, "step": 5610 }, { "epoch": 0.09866746256078934, "grad_norm": 0.0892184536704075, "learning_rate": 0.00019729634831460675, "loss": 0.865, "step": 5620 }, { "epoch": 0.09884302744079075, "grad_norm": 0.10496426530372611, "learning_rate": 0.00019764747191011237, "loss": 0.8677, "step": 5630 }, { "epoch": 0.09901859232079215, "grad_norm": 0.10170756382432672, "learning_rate": 0.000197998595505618, "loss": 0.8638, "step": 5640 }, { "epoch": 0.09919415720079355, "grad_norm": 0.07844715045945563, "learning_rate": 0.0001983497191011236, "loss": 0.8673, "step": 5650 }, { "epoch": 0.09936972208079496, "grad_norm": 0.08308166854934049, "learning_rate": 0.00019870084269662923, "loss": 0.8594, "step": 5660 }, { "epoch": 0.09954528696079636, "grad_norm": 0.07078826247147489, "learning_rate": 0.00019905196629213485, "loss": 0.8689, "step": 5670 }, { "epoch": 0.09972085184079776, "grad_norm": 0.07122424794099147, "learning_rate": 0.00019940308988764045, "loss": 0.8686, "step": 5680 }, { "epoch": 0.09989641672079917, "grad_norm": 0.0662598705125894, "learning_rate": 0.00019975421348314608, "loss": 0.8585, "step": 5690 }, { "epoch": 0.10007198160080058, "grad_norm": 0.08115052049442587, "learning_rate": 0.00019999999994355433, "loss": 0.8687, "step": 5700 }, { "epoch": 0.10024754648080197, "grad_norm": 0.06120919579659045, "learning_rate": 0.00019999999894007513, "loss": 0.8656, "step": 5710 }, { "epoch": 0.10042311136080338, "grad_norm": 0.07271076099270422, "learning_rate": 0.000199999996682247, "loss": 0.8655, "step": 5720 }, { "epoch": 0.10059867624080479, "grad_norm": 0.07008063612434355, "learning_rate": 0.00019999999317006991, "loss": 0.8674, "step": 5730 }, { "epoch": 0.1007742411208062, "grad_norm": 0.08207146951118063, "learning_rate": 0.00019999998840354398, "loss": 0.8715, "step": 5740 }, { "epoch": 0.1009498060008076, "grad_norm": 0.11793559162189993, "learning_rate": 0.00019999998238266918, "loss": 0.861, "step": 5750 }, { "epoch": 0.101125370880809, "grad_norm": 0.09656625220519839, "learning_rate": 0.00019999997510744563, "loss": 0.872, "step": 5760 }, { "epoch": 0.10130093576081041, "grad_norm": 0.08344523597212511, "learning_rate": 0.00019999996657787348, "loss": 0.8655, "step": 5770 }, { "epoch": 0.10147650064081182, "grad_norm": 0.087657217897348, "learning_rate": 0.00019999995679395274, "loss": 0.8588, "step": 5780 }, { "epoch": 0.10165206552081321, "grad_norm": 0.08669015655096904, "learning_rate": 0.00019999994575568356, "loss": 0.8706, "step": 5790 }, { "epoch": 0.10182763040081462, "grad_norm": 0.08540805795078366, "learning_rate": 0.00019999993346306612, "loss": 0.8548, "step": 5800 }, { "epoch": 0.10200319528081603, "grad_norm": 0.0855003350024982, "learning_rate": 0.0001999999199161005, "loss": 0.8714, "step": 5810 }, { "epoch": 0.10217876016081744, "grad_norm": 0.14239010780553799, "learning_rate": 0.000199999905114787, "loss": 0.868, "step": 5820 }, { "epoch": 0.10235432504081883, "grad_norm": 0.07822582490916924, "learning_rate": 0.00019999988905912566, "loss": 0.8644, "step": 5830 }, { "epoch": 0.10252988992082024, "grad_norm": 0.05840941753870967, "learning_rate": 0.00019999987174911682, "loss": 0.872, "step": 5840 }, { "epoch": 0.10270545480082165, "grad_norm": 0.0777612893715757, "learning_rate": 0.00019999985318476057, "loss": 0.8699, "step": 5850 }, { "epoch": 0.10288101968082305, "grad_norm": 0.07908561980487473, "learning_rate": 0.0001999998333660572, "loss": 0.8641, "step": 5860 }, { "epoch": 0.10305658456082445, "grad_norm": 0.09908934997538962, "learning_rate": 0.00019999981229300702, "loss": 0.8616, "step": 5870 }, { "epoch": 0.10323214944082586, "grad_norm": 0.09482973389235208, "learning_rate": 0.00019999978996561023, "loss": 0.8682, "step": 5880 }, { "epoch": 0.10340771432082727, "grad_norm": 0.07115887895206233, "learning_rate": 0.00019999976638386711, "loss": 0.8692, "step": 5890 }, { "epoch": 0.10358327920082866, "grad_norm": 0.06499311559529795, "learning_rate": 0.000199999741547778, "loss": 0.8792, "step": 5900 }, { "epoch": 0.10375884408083007, "grad_norm": 0.09612461730681014, "learning_rate": 0.00019999971545734318, "loss": 0.8658, "step": 5910 }, { "epoch": 0.10393440896083148, "grad_norm": 0.08110422904202048, "learning_rate": 0.000199999688112563, "loss": 0.8707, "step": 5920 }, { "epoch": 0.10410997384083288, "grad_norm": 0.10369390784473606, "learning_rate": 0.00019999965951343778, "loss": 0.8699, "step": 5930 }, { "epoch": 0.10428553872083428, "grad_norm": 0.09699533552971036, "learning_rate": 0.00019999962965996788, "loss": 0.8646, "step": 5940 }, { "epoch": 0.10446110360083569, "grad_norm": 0.07308862974129239, "learning_rate": 0.00019999959855215374, "loss": 0.8655, "step": 5950 }, { "epoch": 0.1046366684808371, "grad_norm": 0.05968577837626002, "learning_rate": 0.00019999956618999568, "loss": 0.8656, "step": 5960 }, { "epoch": 0.1048122333608385, "grad_norm": 0.0719894546627652, "learning_rate": 0.0001999995325734942, "loss": 0.8764, "step": 5970 }, { "epoch": 0.1049877982408399, "grad_norm": 0.08471584877753886, "learning_rate": 0.00019999949770264962, "loss": 0.8685, "step": 5980 }, { "epoch": 0.1051633631208413, "grad_norm": 0.07196709755639648, "learning_rate": 0.00019999946157746244, "loss": 0.8646, "step": 5990 }, { "epoch": 0.10533892800084271, "grad_norm": 0.08281928995007487, "learning_rate": 0.00019999942419793312, "loss": 0.8659, "step": 6000 }, { "epoch": 0.10551449288084412, "grad_norm": 0.08451846739330868, "learning_rate": 0.00019999938556406212, "loss": 0.8714, "step": 6010 }, { "epoch": 0.10569005776084552, "grad_norm": 0.08684304361475265, "learning_rate": 0.00019999934567584994, "loss": 0.8692, "step": 6020 }, { "epoch": 0.10586562264084692, "grad_norm": 0.10077702584925932, "learning_rate": 0.00019999930453329704, "loss": 0.8639, "step": 6030 }, { "epoch": 0.10604118752084833, "grad_norm": 0.07492763032279787, "learning_rate": 0.00019999926213640402, "loss": 0.8659, "step": 6040 }, { "epoch": 0.10621675240084974, "grad_norm": 0.09287087464818368, "learning_rate": 0.00019999921848517136, "loss": 0.8661, "step": 6050 }, { "epoch": 0.10639231728085113, "grad_norm": 0.080949995790025, "learning_rate": 0.00019999917357959964, "loss": 0.8668, "step": 6060 }, { "epoch": 0.10656788216085254, "grad_norm": 0.05809611936487027, "learning_rate": 0.0001999991274196894, "loss": 0.8687, "step": 6070 }, { "epoch": 0.10674344704085395, "grad_norm": 0.0746887531457463, "learning_rate": 0.00019999908000544126, "loss": 0.8582, "step": 6080 }, { "epoch": 0.10691901192085536, "grad_norm": 0.09318559064731355, "learning_rate": 0.0001999990313368558, "loss": 0.8723, "step": 6090 }, { "epoch": 0.10709457680085675, "grad_norm": 0.08538566387847724, "learning_rate": 0.0001999989814139336, "loss": 0.8649, "step": 6100 }, { "epoch": 0.10727014168085816, "grad_norm": 0.09452315098754602, "learning_rate": 0.00019999893023667535, "loss": 0.8688, "step": 6110 }, { "epoch": 0.10744570656085957, "grad_norm": 0.07950987561053775, "learning_rate": 0.00019999887780508172, "loss": 0.871, "step": 6120 }, { "epoch": 0.10762127144086096, "grad_norm": 0.07096817035639305, "learning_rate": 0.00019999882411915332, "loss": 0.8665, "step": 6130 }, { "epoch": 0.10779683632086237, "grad_norm": 0.09326225773926203, "learning_rate": 0.00019999876917889083, "loss": 0.8674, "step": 6140 }, { "epoch": 0.10797240120086378, "grad_norm": 0.06126560978801633, "learning_rate": 0.00019999871298429495, "loss": 0.8643, "step": 6150 }, { "epoch": 0.10814796608086519, "grad_norm": 0.0797053726819108, "learning_rate": 0.00019999865553536642, "loss": 0.8654, "step": 6160 }, { "epoch": 0.10832353096086658, "grad_norm": 0.08125984243510861, "learning_rate": 0.00019999859683210596, "loss": 0.858, "step": 6170 }, { "epoch": 0.10849909584086799, "grad_norm": 0.08431219112265906, "learning_rate": 0.0001999985368745143, "loss": 0.8698, "step": 6180 }, { "epoch": 0.1086746607208694, "grad_norm": 0.06438939115349626, "learning_rate": 0.0001999984756625922, "loss": 0.8683, "step": 6190 }, { "epoch": 0.1088502256008708, "grad_norm": 0.07449894398507802, "learning_rate": 0.00019999841319634046, "loss": 0.8705, "step": 6200 }, { "epoch": 0.1090257904808722, "grad_norm": 0.08540272979984538, "learning_rate": 0.0001999983494757598, "loss": 0.8647, "step": 6210 }, { "epoch": 0.10920135536087361, "grad_norm": 0.07495797825829166, "learning_rate": 0.00019999828450085113, "loss": 0.868, "step": 6220 }, { "epoch": 0.10937692024087502, "grad_norm": 0.10085252850674534, "learning_rate": 0.0001999982182716152, "loss": 0.8687, "step": 6230 }, { "epoch": 0.10955248512087642, "grad_norm": 0.10049466153529564, "learning_rate": 0.0001999981507880529, "loss": 0.8674, "step": 6240 }, { "epoch": 0.10972805000087782, "grad_norm": 0.08928114376815789, "learning_rate": 0.000199998082050165, "loss": 0.8701, "step": 6250 }, { "epoch": 0.10990361488087923, "grad_norm": 0.07446344602630224, "learning_rate": 0.0001999980120579525, "loss": 0.8691, "step": 6260 }, { "epoch": 0.11007917976088064, "grad_norm": 0.06506682042836333, "learning_rate": 0.00019999794081141615, "loss": 0.8633, "step": 6270 }, { "epoch": 0.11025474464088204, "grad_norm": 0.07356071238125399, "learning_rate": 0.0001999978683105569, "loss": 0.8709, "step": 6280 }, { "epoch": 0.11043030952088344, "grad_norm": 0.08262323934648089, "learning_rate": 0.00019999779455537575, "loss": 0.8636, "step": 6290 }, { "epoch": 0.11060587440088485, "grad_norm": 0.10293518138400064, "learning_rate": 0.00019999771954587356, "loss": 0.869, "step": 6300 }, { "epoch": 0.11078143928088625, "grad_norm": 0.09351369426369147, "learning_rate": 0.0001999976432820513, "loss": 0.8665, "step": 6310 }, { "epoch": 0.11095700416088766, "grad_norm": 0.06767427898168131, "learning_rate": 0.00019999756576390986, "loss": 0.8683, "step": 6320 }, { "epoch": 0.11113256904088906, "grad_norm": 0.08935931793196067, "learning_rate": 0.00019999748699145036, "loss": 0.8712, "step": 6330 }, { "epoch": 0.11130813392089046, "grad_norm": 0.10245129977516058, "learning_rate": 0.0001999974069646737, "loss": 0.873, "step": 6340 }, { "epoch": 0.11148369880089187, "grad_norm": 0.06443295472017836, "learning_rate": 0.00019999732568358092, "loss": 0.8672, "step": 6350 }, { "epoch": 0.11165926368089328, "grad_norm": 0.12901080808613738, "learning_rate": 0.00019999724314817308, "loss": 0.8722, "step": 6360 }, { "epoch": 0.11183482856089468, "grad_norm": 0.08829695413936106, "learning_rate": 0.00019999715935845115, "loss": 0.862, "step": 6370 }, { "epoch": 0.11201039344089608, "grad_norm": 0.12886658976115806, "learning_rate": 0.00019999707431441626, "loss": 0.8644, "step": 6380 }, { "epoch": 0.11218595832089749, "grad_norm": 0.08992976476210034, "learning_rate": 0.00019999698801606946, "loss": 0.8676, "step": 6390 }, { "epoch": 0.11236152320089889, "grad_norm": 0.06288656344847394, "learning_rate": 0.00019999690046341184, "loss": 0.8665, "step": 6400 }, { "epoch": 0.1125370880809003, "grad_norm": 0.10361696689855823, "learning_rate": 0.00019999681165644453, "loss": 0.8634, "step": 6410 }, { "epoch": 0.1127126529609017, "grad_norm": 0.08416559105822072, "learning_rate": 0.00019999672159516866, "loss": 0.8666, "step": 6420 }, { "epoch": 0.11288821784090311, "grad_norm": 0.08755783557908572, "learning_rate": 0.00019999663027958536, "loss": 0.8644, "step": 6430 }, { "epoch": 0.1130637827209045, "grad_norm": 0.07017255913653044, "learning_rate": 0.00019999653770969573, "loss": 0.8648, "step": 6440 }, { "epoch": 0.11323934760090591, "grad_norm": 0.07583121919386751, "learning_rate": 0.00019999644388550103, "loss": 0.8631, "step": 6450 }, { "epoch": 0.11341491248090732, "grad_norm": 0.07455572372353134, "learning_rate": 0.00019999634880700243, "loss": 0.8709, "step": 6460 }, { "epoch": 0.11359047736090873, "grad_norm": 0.06354505214521729, "learning_rate": 0.00019999625247420105, "loss": 0.8624, "step": 6470 }, { "epoch": 0.11376604224091012, "grad_norm": 0.09167279147972723, "learning_rate": 0.00019999615488709827, "loss": 0.8628, "step": 6480 }, { "epoch": 0.11394160712091153, "grad_norm": 0.07530994927582323, "learning_rate": 0.00019999605604569519, "loss": 0.868, "step": 6490 }, { "epoch": 0.11411717200091294, "grad_norm": 0.10898181248596628, "learning_rate": 0.00019999595594999309, "loss": 0.8621, "step": 6500 }, { "epoch": 0.11429273688091435, "grad_norm": 0.06594956241740206, "learning_rate": 0.00019999585459999327, "loss": 0.8649, "step": 6510 }, { "epoch": 0.11446830176091574, "grad_norm": 0.08614647134189739, "learning_rate": 0.00019999575199569703, "loss": 0.8597, "step": 6520 }, { "epoch": 0.11464386664091715, "grad_norm": 0.09465561323595492, "learning_rate": 0.00019999564813710558, "loss": 0.871, "step": 6530 }, { "epoch": 0.11481943152091856, "grad_norm": 0.07353382388727248, "learning_rate": 0.00019999554302422033, "loss": 0.8583, "step": 6540 }, { "epoch": 0.11499499640091997, "grad_norm": 0.07259023633604288, "learning_rate": 0.00019999543665704262, "loss": 0.8648, "step": 6550 }, { "epoch": 0.11517056128092136, "grad_norm": 0.08061027167670781, "learning_rate": 0.00019999532903557367, "loss": 0.8746, "step": 6560 }, { "epoch": 0.11534612616092277, "grad_norm": 0.07897083524581373, "learning_rate": 0.00019999522015981498, "loss": 0.862, "step": 6570 }, { "epoch": 0.11552169104092418, "grad_norm": 0.09370825365000673, "learning_rate": 0.00019999511002976786, "loss": 0.8572, "step": 6580 }, { "epoch": 0.11569725592092558, "grad_norm": 0.11887697661952216, "learning_rate": 0.00019999499864543376, "loss": 0.8621, "step": 6590 }, { "epoch": 0.11587282080092698, "grad_norm": 0.09492930258042202, "learning_rate": 0.00019999488600681403, "loss": 0.8703, "step": 6600 }, { "epoch": 0.11604838568092839, "grad_norm": 0.07337217730463012, "learning_rate": 0.0001999947721139101, "loss": 0.8555, "step": 6610 }, { "epoch": 0.1162239505609298, "grad_norm": 0.08607413425578857, "learning_rate": 0.00019999465696672345, "loss": 0.8722, "step": 6620 }, { "epoch": 0.11639951544093119, "grad_norm": 0.07886298836159036, "learning_rate": 0.00019999454056525553, "loss": 0.8643, "step": 6630 }, { "epoch": 0.1165750803209326, "grad_norm": 0.11743638197999838, "learning_rate": 0.00019999442290950784, "loss": 0.8701, "step": 6640 }, { "epoch": 0.116750645200934, "grad_norm": 0.07461184649095323, "learning_rate": 0.00019999430399948178, "loss": 0.8628, "step": 6650 }, { "epoch": 0.11692621008093541, "grad_norm": 0.07431165118958825, "learning_rate": 0.00019999418383517893, "loss": 0.8672, "step": 6660 }, { "epoch": 0.11710177496093681, "grad_norm": 0.09049083204237948, "learning_rate": 0.0001999940624166008, "loss": 0.8604, "step": 6670 }, { "epoch": 0.11727733984093822, "grad_norm": 0.08053893181504287, "learning_rate": 0.00019999393974374897, "loss": 0.8622, "step": 6680 }, { "epoch": 0.11745290472093962, "grad_norm": 0.06790978019891217, "learning_rate": 0.0001999938158166249, "loss": 0.866, "step": 6690 }, { "epoch": 0.11762846960094103, "grad_norm": 0.06941697690309509, "learning_rate": 0.00019999369063523025, "loss": 0.8688, "step": 6700 }, { "epoch": 0.11780403448094243, "grad_norm": 0.08640293820886544, "learning_rate": 0.00019999356419956655, "loss": 0.8663, "step": 6710 }, { "epoch": 0.11797959936094383, "grad_norm": 0.08173441778188999, "learning_rate": 0.0001999934365096354, "loss": 0.8747, "step": 6720 }, { "epoch": 0.11815516424094524, "grad_norm": 0.08740826126255855, "learning_rate": 0.00019999330756543846, "loss": 0.8609, "step": 6730 }, { "epoch": 0.11833072912094665, "grad_norm": 0.06218179823133969, "learning_rate": 0.00019999317736697737, "loss": 0.8676, "step": 6740 }, { "epoch": 0.11850629400094805, "grad_norm": 0.08341363142816993, "learning_rate": 0.0001999930459142537, "loss": 0.8663, "step": 6750 }, { "epoch": 0.11868185888094945, "grad_norm": 0.08311774202815386, "learning_rate": 0.00019999291320726918, "loss": 0.8609, "step": 6760 }, { "epoch": 0.11885742376095086, "grad_norm": 0.09567146188622888, "learning_rate": 0.00019999277924602547, "loss": 0.8746, "step": 6770 }, { "epoch": 0.11903298864095227, "grad_norm": 0.10158800856109962, "learning_rate": 0.0001999926440305243, "loss": 0.8654, "step": 6780 }, { "epoch": 0.11920855352095366, "grad_norm": 0.07712153256744408, "learning_rate": 0.00019999250756076736, "loss": 0.864, "step": 6790 }, { "epoch": 0.11938411840095507, "grad_norm": 0.060324355022765515, "learning_rate": 0.00019999236983675635, "loss": 0.8637, "step": 6800 }, { "epoch": 0.11955968328095648, "grad_norm": 0.08411255978235048, "learning_rate": 0.00019999223085849307, "loss": 0.8536, "step": 6810 }, { "epoch": 0.11973524816095789, "grad_norm": 0.08102618790704574, "learning_rate": 0.0001999920906259792, "loss": 0.8685, "step": 6820 }, { "epoch": 0.11991081304095928, "grad_norm": 0.08160311119269288, "learning_rate": 0.0001999919491392166, "loss": 0.8595, "step": 6830 }, { "epoch": 0.12008637792096069, "grad_norm": 0.12122808782140802, "learning_rate": 0.00019999180639820708, "loss": 0.8644, "step": 6840 }, { "epoch": 0.1202619428009621, "grad_norm": 0.07563399350941598, "learning_rate": 0.00019999166240295234, "loss": 0.8635, "step": 6850 }, { "epoch": 0.1204375076809635, "grad_norm": 0.08006821286090464, "learning_rate": 0.0001999915171534543, "loss": 0.8587, "step": 6860 }, { "epoch": 0.1206130725609649, "grad_norm": 0.08339756380077658, "learning_rate": 0.00019999137064971475, "loss": 0.8651, "step": 6870 }, { "epoch": 0.12078863744096631, "grad_norm": 0.08399774951682784, "learning_rate": 0.00019999122289173556, "loss": 0.86, "step": 6880 }, { "epoch": 0.12096420232096772, "grad_norm": 0.08516551542864127, "learning_rate": 0.00019999107387951864, "loss": 0.859, "step": 6890 }, { "epoch": 0.12113976720096911, "grad_norm": 0.07829214773855364, "learning_rate": 0.0001999909236130658, "loss": 0.8607, "step": 6900 }, { "epoch": 0.12131533208097052, "grad_norm": 0.07150225604036, "learning_rate": 0.000199990772092379, "loss": 0.8662, "step": 6910 }, { "epoch": 0.12149089696097193, "grad_norm": 0.07477752417774537, "learning_rate": 0.00019999061931746012, "loss": 0.8646, "step": 6920 }, { "epoch": 0.12166646184097334, "grad_norm": 0.11078704328903252, "learning_rate": 0.00019999046528831118, "loss": 0.8629, "step": 6930 }, { "epoch": 0.12184202672097473, "grad_norm": 0.12418489595002523, "learning_rate": 0.000199990310004934, "loss": 0.8559, "step": 6940 }, { "epoch": 0.12201759160097614, "grad_norm": 0.06998456316081583, "learning_rate": 0.0001999901534673307, "loss": 0.8638, "step": 6950 }, { "epoch": 0.12219315648097755, "grad_norm": 0.06729024179566495, "learning_rate": 0.00019998999567550312, "loss": 0.8656, "step": 6960 }, { "epoch": 0.12236872136097895, "grad_norm": 0.07826644010008393, "learning_rate": 0.00019998983662945338, "loss": 0.8691, "step": 6970 }, { "epoch": 0.12254428624098035, "grad_norm": 0.09028200218501865, "learning_rate": 0.00019998967632918338, "loss": 0.8646, "step": 6980 }, { "epoch": 0.12271985112098176, "grad_norm": 0.06819629080257669, "learning_rate": 0.00019998951477469528, "loss": 0.8699, "step": 6990 }, { "epoch": 0.12289541600098317, "grad_norm": 0.09800816698358492, "learning_rate": 0.000199989351965991, "loss": 0.8551, "step": 7000 }, { "epoch": 0.12307098088098457, "grad_norm": 0.07584536567529526, "learning_rate": 0.00019998918790307266, "loss": 0.8674, "step": 7010 }, { "epoch": 0.12324654576098597, "grad_norm": 0.07651475306048983, "learning_rate": 0.00019998902258594234, "loss": 0.8651, "step": 7020 }, { "epoch": 0.12342211064098738, "grad_norm": 0.07451610141984215, "learning_rate": 0.00019998885601460215, "loss": 0.8679, "step": 7030 }, { "epoch": 0.12359767552098878, "grad_norm": 0.06829257767750715, "learning_rate": 0.00019998868818905417, "loss": 0.8603, "step": 7040 }, { "epoch": 0.12377324040099019, "grad_norm": 0.11661286758607511, "learning_rate": 0.00019998851910930056, "loss": 0.858, "step": 7050 }, { "epoch": 0.12394880528099159, "grad_norm": 0.07533027381527405, "learning_rate": 0.00019998834877534341, "loss": 0.8664, "step": 7060 }, { "epoch": 0.124124370160993, "grad_norm": 0.08518685375615481, "learning_rate": 0.00019998817718718497, "loss": 0.8674, "step": 7070 }, { "epoch": 0.1242999350409944, "grad_norm": 0.11718953267785284, "learning_rate": 0.0001999880043448273, "loss": 0.8597, "step": 7080 }, { "epoch": 0.12447549992099581, "grad_norm": 0.06041968996360231, "learning_rate": 0.00019998783024827266, "loss": 0.8591, "step": 7090 }, { "epoch": 0.1246510648009972, "grad_norm": 0.08895705776728655, "learning_rate": 0.0001999876548975232, "loss": 0.8688, "step": 7100 }, { "epoch": 0.12482662968099861, "grad_norm": 0.09209232934596556, "learning_rate": 0.0001999874782925812, "loss": 0.8631, "step": 7110 }, { "epoch": 0.125002194561, "grad_norm": 0.073680188021475, "learning_rate": 0.00019998730043344895, "loss": 0.8606, "step": 7120 }, { "epoch": 0.12517775944100143, "grad_norm": 0.06970140115454933, "learning_rate": 0.00019998712132012858, "loss": 0.8626, "step": 7130 }, { "epoch": 0.12535332432100282, "grad_norm": 0.07195509838980164, "learning_rate": 0.00019998694095262238, "loss": 0.866, "step": 7140 }, { "epoch": 0.12552888920100422, "grad_norm": 0.09359695167686342, "learning_rate": 0.00019998675933093272, "loss": 0.8622, "step": 7150 }, { "epoch": 0.12570445408100564, "grad_norm": 0.0945085929979948, "learning_rate": 0.00019998657645506183, "loss": 0.8602, "step": 7160 }, { "epoch": 0.12588001896100703, "grad_norm": 0.10430049127986885, "learning_rate": 0.00019998639232501203, "loss": 0.8669, "step": 7170 }, { "epoch": 0.12605558384100846, "grad_norm": 0.06948410625736152, "learning_rate": 0.0001999862069407857, "loss": 0.8607, "step": 7180 }, { "epoch": 0.12623114872100985, "grad_norm": 0.06202101545318056, "learning_rate": 0.00019998602030238513, "loss": 0.8685, "step": 7190 }, { "epoch": 0.12640671360101124, "grad_norm": 0.08321540080321779, "learning_rate": 0.00019998583240981273, "loss": 0.8628, "step": 7200 }, { "epoch": 0.12658227848101267, "grad_norm": 0.12479142934717083, "learning_rate": 0.00019998564326307087, "loss": 0.8648, "step": 7210 }, { "epoch": 0.12675784336101406, "grad_norm": 0.0678888725635754, "learning_rate": 0.0001999854528621619, "loss": 0.8617, "step": 7220 }, { "epoch": 0.12693340824101546, "grad_norm": 0.06172599705488479, "learning_rate": 0.0001999852612070883, "loss": 0.866, "step": 7230 }, { "epoch": 0.12710897312101688, "grad_norm": 0.07005113820190416, "learning_rate": 0.0001999850682978525, "loss": 0.8659, "step": 7240 }, { "epoch": 0.12728453800101827, "grad_norm": 0.07979955380921058, "learning_rate": 0.00019998487413445685, "loss": 0.8588, "step": 7250 }, { "epoch": 0.1274601028810197, "grad_norm": 0.06202932181058022, "learning_rate": 0.0001999846787169039, "loss": 0.8717, "step": 7260 }, { "epoch": 0.1276356677610211, "grad_norm": 0.07786650945228414, "learning_rate": 0.00019998448204519613, "loss": 0.8548, "step": 7270 }, { "epoch": 0.12781123264102248, "grad_norm": 0.09704058046716232, "learning_rate": 0.00019998428411933597, "loss": 0.8667, "step": 7280 }, { "epoch": 0.1279867975210239, "grad_norm": 0.13141578953770494, "learning_rate": 0.00019998408493932595, "loss": 0.8643, "step": 7290 }, { "epoch": 0.1281623624010253, "grad_norm": 0.09946917261695024, "learning_rate": 0.00019998388450516864, "loss": 0.86, "step": 7300 }, { "epoch": 0.1283379272810267, "grad_norm": 0.06468876121153408, "learning_rate": 0.00019998368281686648, "loss": 0.8663, "step": 7310 }, { "epoch": 0.12851349216102811, "grad_norm": 0.08299448610774098, "learning_rate": 0.00019998347987442217, "loss": 0.87, "step": 7320 }, { "epoch": 0.1286890570410295, "grad_norm": 0.06991833842567641, "learning_rate": 0.00019998327567783816, "loss": 0.8663, "step": 7330 }, { "epoch": 0.12886462192103093, "grad_norm": 0.06438973276071469, "learning_rate": 0.00019998307022711708, "loss": 0.8647, "step": 7340 }, { "epoch": 0.12904018680103232, "grad_norm": 0.07961236381273255, "learning_rate": 0.0001999828635222615, "loss": 0.864, "step": 7350 }, { "epoch": 0.12921575168103372, "grad_norm": 0.07903442970479649, "learning_rate": 0.0001999826555632741, "loss": 0.8683, "step": 7360 }, { "epoch": 0.12939131656103514, "grad_norm": 0.09440078614171286, "learning_rate": 0.00019998244635015752, "loss": 0.8723, "step": 7370 }, { "epoch": 0.12956688144103654, "grad_norm": 0.07896254632174128, "learning_rate": 0.00019998223588291433, "loss": 0.8684, "step": 7380 }, { "epoch": 0.12974244632103793, "grad_norm": 0.10629569772396467, "learning_rate": 0.00019998202416154724, "loss": 0.8642, "step": 7390 }, { "epoch": 0.12991801120103935, "grad_norm": 0.08337568127340755, "learning_rate": 0.00019998181118605893, "loss": 0.8572, "step": 7400 }, { "epoch": 0.13009357608104075, "grad_norm": 0.12933104531436018, "learning_rate": 0.00019998159695645213, "loss": 0.8587, "step": 7410 }, { "epoch": 0.13026914096104214, "grad_norm": 0.0624210331651339, "learning_rate": 0.00019998138147272953, "loss": 0.8695, "step": 7420 }, { "epoch": 0.13044470584104356, "grad_norm": 0.06198528454553228, "learning_rate": 0.00019998116473489384, "loss": 0.8699, "step": 7430 }, { "epoch": 0.13062027072104496, "grad_norm": 0.08986008441312672, "learning_rate": 0.00019998094674294778, "loss": 0.8563, "step": 7440 }, { "epoch": 0.13079583560104638, "grad_norm": 0.09576007207439642, "learning_rate": 0.0001999807274968942, "loss": 0.8641, "step": 7450 }, { "epoch": 0.13097140048104777, "grad_norm": 0.07811431741574616, "learning_rate": 0.00019998050699673585, "loss": 0.8722, "step": 7460 }, { "epoch": 0.13114696536104917, "grad_norm": 0.07436090882982085, "learning_rate": 0.0001999802852424755, "loss": 0.8633, "step": 7470 }, { "epoch": 0.1313225302410506, "grad_norm": 0.06900096948287676, "learning_rate": 0.00019998006223411594, "loss": 0.8618, "step": 7480 }, { "epoch": 0.13149809512105198, "grad_norm": 0.06991603081005608, "learning_rate": 0.00019997983797166003, "loss": 0.8622, "step": 7490 }, { "epoch": 0.13167366000105338, "grad_norm": 0.08336308943038788, "learning_rate": 0.0001999796124551106, "loss": 0.8614, "step": 7500 }, { "epoch": 0.1318492248810548, "grad_norm": 0.05857836997335782, "learning_rate": 0.00019997938568447053, "loss": 0.8696, "step": 7510 }, { "epoch": 0.1320247897610562, "grad_norm": 0.061331575939083324, "learning_rate": 0.00019997915765974263, "loss": 0.8627, "step": 7520 }, { "epoch": 0.13220035464105762, "grad_norm": 0.12452096382272163, "learning_rate": 0.0001999789283809299, "loss": 0.8601, "step": 7530 }, { "epoch": 0.132375919521059, "grad_norm": 0.07378478604646697, "learning_rate": 0.00019997869784803515, "loss": 0.8678, "step": 7540 }, { "epoch": 0.1325514844010604, "grad_norm": 0.08915917243276575, "learning_rate": 0.00019997846606106134, "loss": 0.8671, "step": 7550 }, { "epoch": 0.13272704928106183, "grad_norm": 0.06964537694834418, "learning_rate": 0.00019997823302001134, "loss": 0.8645, "step": 7560 }, { "epoch": 0.13290261416106322, "grad_norm": 0.06887584061236975, "learning_rate": 0.00019997799872488822, "loss": 0.8681, "step": 7570 }, { "epoch": 0.13307817904106461, "grad_norm": 0.07338452277336925, "learning_rate": 0.00019997776317569487, "loss": 0.8669, "step": 7580 }, { "epoch": 0.13325374392106604, "grad_norm": 0.07527237758228512, "learning_rate": 0.00019997752637243427, "loss": 0.8663, "step": 7590 }, { "epoch": 0.13342930880106743, "grad_norm": 0.07122887723854143, "learning_rate": 0.0001999772883151095, "loss": 0.8628, "step": 7600 }, { "epoch": 0.13360487368106883, "grad_norm": 0.08050908182130337, "learning_rate": 0.00019997704900372347, "loss": 0.8676, "step": 7610 }, { "epoch": 0.13378043856107025, "grad_norm": 0.06773373887509727, "learning_rate": 0.00019997680843827925, "loss": 0.8644, "step": 7620 }, { "epoch": 0.13395600344107164, "grad_norm": 0.07056489320852093, "learning_rate": 0.00019997656661877993, "loss": 0.8664, "step": 7630 }, { "epoch": 0.13413156832107306, "grad_norm": 0.08452788695089015, "learning_rate": 0.00019997632354522852, "loss": 0.8685, "step": 7640 }, { "epoch": 0.13430713320107446, "grad_norm": 0.07556254172026002, "learning_rate": 0.00019997607921762813, "loss": 0.8666, "step": 7650 }, { "epoch": 0.13448269808107585, "grad_norm": 0.0736956694792162, "learning_rate": 0.00019997583363598186, "loss": 0.8741, "step": 7660 }, { "epoch": 0.13465826296107727, "grad_norm": 0.09490239388328225, "learning_rate": 0.0001999755868002928, "loss": 0.8636, "step": 7670 }, { "epoch": 0.13483382784107867, "grad_norm": 0.07655251831927491, "learning_rate": 0.00019997533871056407, "loss": 0.8697, "step": 7680 }, { "epoch": 0.13500939272108006, "grad_norm": 0.10343269470152557, "learning_rate": 0.00019997508936679884, "loss": 0.8612, "step": 7690 }, { "epoch": 0.13518495760108148, "grad_norm": 0.06448586819635803, "learning_rate": 0.00019997483876900028, "loss": 0.8672, "step": 7700 }, { "epoch": 0.13536052248108288, "grad_norm": 0.07929609900453134, "learning_rate": 0.0001999745869171715, "loss": 0.8705, "step": 7710 }, { "epoch": 0.1355360873610843, "grad_norm": 0.07542779050514448, "learning_rate": 0.00019997433381131578, "loss": 0.862, "step": 7720 }, { "epoch": 0.1357116522410857, "grad_norm": 0.07390768465646513, "learning_rate": 0.00019997407945143627, "loss": 0.8683, "step": 7730 }, { "epoch": 0.1358872171210871, "grad_norm": 0.06234469323883626, "learning_rate": 0.0001999738238375362, "loss": 0.8698, "step": 7740 }, { "epoch": 0.1360627820010885, "grad_norm": 0.068467463857245, "learning_rate": 0.00019997356696961881, "loss": 0.8559, "step": 7750 }, { "epoch": 0.1362383468810899, "grad_norm": 0.08492980228703197, "learning_rate": 0.00019997330884768738, "loss": 0.8625, "step": 7760 }, { "epoch": 0.1364139117610913, "grad_norm": 0.06207522791229603, "learning_rate": 0.00019997304947174513, "loss": 0.8551, "step": 7770 }, { "epoch": 0.13658947664109272, "grad_norm": 0.0725808457815244, "learning_rate": 0.00019997278884179538, "loss": 0.861, "step": 7780 }, { "epoch": 0.13676504152109412, "grad_norm": 0.08033819820833364, "learning_rate": 0.00019997252695784143, "loss": 0.8689, "step": 7790 }, { "epoch": 0.13694060640109554, "grad_norm": 0.08982211061950059, "learning_rate": 0.00019997226381988662, "loss": 0.871, "step": 7800 }, { "epoch": 0.13711617128109693, "grad_norm": 0.07131737353291, "learning_rate": 0.0001999719994279342, "loss": 0.8598, "step": 7810 }, { "epoch": 0.13729173616109833, "grad_norm": 0.08004395486306835, "learning_rate": 0.00019997173378198766, "loss": 0.8571, "step": 7820 }, { "epoch": 0.13746730104109975, "grad_norm": 0.07305514861101732, "learning_rate": 0.0001999714668820502, "loss": 0.858, "step": 7830 }, { "epoch": 0.13764286592110114, "grad_norm": 0.05685932586683221, "learning_rate": 0.00019997119872812536, "loss": 0.8657, "step": 7840 }, { "epoch": 0.13781843080110254, "grad_norm": 0.06504601064166171, "learning_rate": 0.00019997092932021643, "loss": 0.8637, "step": 7850 }, { "epoch": 0.13799399568110396, "grad_norm": 0.06000046499294888, "learning_rate": 0.00019997065865832685, "loss": 0.8707, "step": 7860 }, { "epoch": 0.13816956056110535, "grad_norm": 0.07310407555002096, "learning_rate": 0.0001999703867424601, "loss": 0.8655, "step": 7870 }, { "epoch": 0.13834512544110675, "grad_norm": 0.07279701460333302, "learning_rate": 0.00019997011357261954, "loss": 0.8607, "step": 7880 }, { "epoch": 0.13852069032110817, "grad_norm": 0.05683728055548074, "learning_rate": 0.00019996983914880866, "loss": 0.8676, "step": 7890 }, { "epoch": 0.13869625520110956, "grad_norm": 0.0694588047563135, "learning_rate": 0.000199969563471031, "loss": 0.8532, "step": 7900 }, { "epoch": 0.13887182008111099, "grad_norm": 0.06397035234155944, "learning_rate": 0.00019996928653928997, "loss": 0.8636, "step": 7910 }, { "epoch": 0.13904738496111238, "grad_norm": 0.0593671427715556, "learning_rate": 0.00019996900835358915, "loss": 0.8663, "step": 7920 }, { "epoch": 0.13922294984111377, "grad_norm": 0.06991832355467917, "learning_rate": 0.000199968728913932, "loss": 0.8663, "step": 7930 }, { "epoch": 0.1393985147211152, "grad_norm": 0.08800082443723872, "learning_rate": 0.00019996844822032207, "loss": 0.8659, "step": 7940 }, { "epoch": 0.1395740796011166, "grad_norm": 0.06615879372452065, "learning_rate": 0.00019996816627276294, "loss": 0.8728, "step": 7950 }, { "epoch": 0.13974964448111798, "grad_norm": 0.074350658551748, "learning_rate": 0.00019996788307125819, "loss": 0.8609, "step": 7960 }, { "epoch": 0.1399252093611194, "grad_norm": 0.09286392327233237, "learning_rate": 0.00019996759861581138, "loss": 0.8643, "step": 7970 }, { "epoch": 0.1401007742411208, "grad_norm": 0.08870220132813415, "learning_rate": 0.00019996731290642614, "loss": 0.8627, "step": 7980 }, { "epoch": 0.14027633912112222, "grad_norm": 0.08546161863448751, "learning_rate": 0.00019996702594310607, "loss": 0.8751, "step": 7990 }, { "epoch": 0.14045190400112362, "grad_norm": 0.08198455410242135, "learning_rate": 0.00019996673772585484, "loss": 0.8625, "step": 8000 }, { "epoch": 0.140627468881125, "grad_norm": 0.10064043024920288, "learning_rate": 0.00019996644825467604, "loss": 0.8693, "step": 8010 }, { "epoch": 0.14080303376112643, "grad_norm": 0.06109620034871559, "learning_rate": 0.0001999661575295734, "loss": 0.8574, "step": 8020 }, { "epoch": 0.14097859864112783, "grad_norm": 0.0799266894829027, "learning_rate": 0.00019996586555055056, "loss": 0.8604, "step": 8030 }, { "epoch": 0.14115416352112922, "grad_norm": 0.068758539594058, "learning_rate": 0.00019996557231761127, "loss": 0.8668, "step": 8040 }, { "epoch": 0.14132972840113064, "grad_norm": 0.08429222535924202, "learning_rate": 0.00019996527783075919, "loss": 0.8634, "step": 8050 }, { "epoch": 0.14150529328113204, "grad_norm": 0.06745855639969878, "learning_rate": 0.00019996498208999804, "loss": 0.8711, "step": 8060 }, { "epoch": 0.14168085816113346, "grad_norm": 0.06647319369616542, "learning_rate": 0.00019996468509533163, "loss": 0.8694, "step": 8070 }, { "epoch": 0.14185642304113485, "grad_norm": 0.07391600089092751, "learning_rate": 0.0001999643868467637, "loss": 0.8646, "step": 8080 }, { "epoch": 0.14203198792113625, "grad_norm": 0.061424303117080026, "learning_rate": 0.000199964087344298, "loss": 0.8533, "step": 8090 }, { "epoch": 0.14220755280113767, "grad_norm": 0.07127557212068945, "learning_rate": 0.0001999637865879384, "loss": 0.8651, "step": 8100 }, { "epoch": 0.14238311768113907, "grad_norm": 0.07793301121588155, "learning_rate": 0.0001999634845776886, "loss": 0.864, "step": 8110 }, { "epoch": 0.14255868256114046, "grad_norm": 0.07168635736295148, "learning_rate": 0.00019996318131355252, "loss": 0.8574, "step": 8120 }, { "epoch": 0.14273424744114188, "grad_norm": 0.08339932699061672, "learning_rate": 0.00019996287679553394, "loss": 0.8597, "step": 8130 }, { "epoch": 0.14290981232114328, "grad_norm": 0.08332609478405832, "learning_rate": 0.00019996257102363675, "loss": 0.8637, "step": 8140 }, { "epoch": 0.14308537720114467, "grad_norm": 0.07660408820691532, "learning_rate": 0.00019996226399786484, "loss": 0.8623, "step": 8150 }, { "epoch": 0.1432609420811461, "grad_norm": 0.05308178861678294, "learning_rate": 0.00019996195571822206, "loss": 0.8749, "step": 8160 }, { "epoch": 0.1434365069611475, "grad_norm": 0.10094193393812588, "learning_rate": 0.00019996164618471234, "loss": 0.8578, "step": 8170 }, { "epoch": 0.1436120718411489, "grad_norm": 0.07224715398841358, "learning_rate": 0.00019996133539733959, "loss": 0.8718, "step": 8180 }, { "epoch": 0.1437876367211503, "grad_norm": 0.06172443073945614, "learning_rate": 0.00019996102335610778, "loss": 0.8681, "step": 8190 }, { "epoch": 0.1439632016011517, "grad_norm": 0.07628329806305256, "learning_rate": 0.0001999607100610208, "loss": 0.8692, "step": 8200 }, { "epoch": 0.14413876648115312, "grad_norm": 0.07257495093918971, "learning_rate": 0.0001999603955120827, "loss": 0.8706, "step": 8210 }, { "epoch": 0.1443143313611545, "grad_norm": 0.08652328706797828, "learning_rate": 0.0001999600797092974, "loss": 0.8612, "step": 8220 }, { "epoch": 0.1444898962411559, "grad_norm": 0.05985572657647732, "learning_rate": 0.0001999597626526689, "loss": 0.8724, "step": 8230 }, { "epoch": 0.14466546112115733, "grad_norm": 0.06907438585620056, "learning_rate": 0.00019995944434220126, "loss": 0.8658, "step": 8240 }, { "epoch": 0.14484102600115872, "grad_norm": 0.08038741236179113, "learning_rate": 0.0001999591247778985, "loss": 0.863, "step": 8250 }, { "epoch": 0.14501659088116015, "grad_norm": 0.09823924023835057, "learning_rate": 0.00019995880395976467, "loss": 0.8656, "step": 8260 }, { "epoch": 0.14519215576116154, "grad_norm": 0.063127811496834, "learning_rate": 0.00019995848188780383, "loss": 0.866, "step": 8270 }, { "epoch": 0.14536772064116293, "grad_norm": 0.09736354394589922, "learning_rate": 0.00019995815856202007, "loss": 0.8669, "step": 8280 }, { "epoch": 0.14554328552116436, "grad_norm": 0.09841097723260638, "learning_rate": 0.00019995783398241745, "loss": 0.8672, "step": 8290 }, { "epoch": 0.14571885040116575, "grad_norm": 0.07283386116763972, "learning_rate": 0.00019995750814900013, "loss": 0.8663, "step": 8300 }, { "epoch": 0.14589441528116714, "grad_norm": 0.0775444840781556, "learning_rate": 0.00019995718106177217, "loss": 0.8675, "step": 8310 }, { "epoch": 0.14606998016116857, "grad_norm": 0.08668953506115076, "learning_rate": 0.00019995685272073782, "loss": 0.859, "step": 8320 }, { "epoch": 0.14624554504116996, "grad_norm": 0.06396927920741455, "learning_rate": 0.00019995652312590115, "loss": 0.8659, "step": 8330 }, { "epoch": 0.14642110992117136, "grad_norm": 0.1030289024513569, "learning_rate": 0.0001999561922772664, "loss": 0.8741, "step": 8340 }, { "epoch": 0.14659667480117278, "grad_norm": 0.05465013085212943, "learning_rate": 0.00019995586017483767, "loss": 0.8564, "step": 8350 }, { "epoch": 0.14677223968117417, "grad_norm": 0.06074197273688347, "learning_rate": 0.00019995552681861925, "loss": 0.8682, "step": 8360 }, { "epoch": 0.1469478045611756, "grad_norm": 0.07312150928547612, "learning_rate": 0.00019995519220861535, "loss": 0.8619, "step": 8370 }, { "epoch": 0.147123369441177, "grad_norm": 0.07386732434699313, "learning_rate": 0.0001999548563448302, "loss": 0.8686, "step": 8380 }, { "epoch": 0.14729893432117838, "grad_norm": 0.07361871239458914, "learning_rate": 0.00019995451922726808, "loss": 0.8562, "step": 8390 }, { "epoch": 0.1474744992011798, "grad_norm": 0.06424309210528296, "learning_rate": 0.0001999541808559332, "loss": 0.8624, "step": 8400 }, { "epoch": 0.1476500640811812, "grad_norm": 0.0870024512994654, "learning_rate": 0.00019995384123082987, "loss": 0.8653, "step": 8410 }, { "epoch": 0.1478256289611826, "grad_norm": 0.09087337621831493, "learning_rate": 0.00019995350035196245, "loss": 0.8713, "step": 8420 }, { "epoch": 0.14800119384118401, "grad_norm": 0.07883004117975567, "learning_rate": 0.0001999531582193352, "loss": 0.8727, "step": 8430 }, { "epoch": 0.1481767587211854, "grad_norm": 0.09168322270945552, "learning_rate": 0.00019995281483295248, "loss": 0.8593, "step": 8440 }, { "epoch": 0.14835232360118683, "grad_norm": 0.0732946541279441, "learning_rate": 0.0001999524701928186, "loss": 0.862, "step": 8450 }, { "epoch": 0.14852788848118822, "grad_norm": 0.0550971982818378, "learning_rate": 0.000199952124298938, "loss": 0.8595, "step": 8460 }, { "epoch": 0.14870345336118962, "grad_norm": 0.06850952990859187, "learning_rate": 0.00019995177715131498, "loss": 0.8646, "step": 8470 }, { "epoch": 0.14887901824119104, "grad_norm": 0.07494472979392426, "learning_rate": 0.000199951428749954, "loss": 0.8605, "step": 8480 }, { "epoch": 0.14905458312119244, "grad_norm": 0.07434027217336076, "learning_rate": 0.00019995107909485948, "loss": 0.8644, "step": 8490 }, { "epoch": 0.14923014800119383, "grad_norm": 0.06681746802354996, "learning_rate": 0.00019995072818603578, "loss": 0.8643, "step": 8500 }, { "epoch": 0.14940571288119525, "grad_norm": 0.06917371879590063, "learning_rate": 0.00019995037602348742, "loss": 0.8642, "step": 8510 }, { "epoch": 0.14958127776119665, "grad_norm": 0.06394503379057809, "learning_rate": 0.0001999500226072188, "loss": 0.8573, "step": 8520 }, { "epoch": 0.14975684264119807, "grad_norm": 0.11756490259832941, "learning_rate": 0.00019994966793723446, "loss": 0.8662, "step": 8530 }, { "epoch": 0.14993240752119946, "grad_norm": 0.06736807555176572, "learning_rate": 0.00019994931201353883, "loss": 0.863, "step": 8540 }, { "epoch": 0.15010797240120086, "grad_norm": 0.11001681610355439, "learning_rate": 0.0001999489548361365, "loss": 0.8583, "step": 8550 }, { "epoch": 0.15028353728120228, "grad_norm": 0.08136238789073882, "learning_rate": 0.00019994859640503195, "loss": 0.8613, "step": 8560 }, { "epoch": 0.15045910216120367, "grad_norm": 0.0663069571743909, "learning_rate": 0.00019994823672022966, "loss": 0.8548, "step": 8570 }, { "epoch": 0.15063466704120507, "grad_norm": 0.07560663833352216, "learning_rate": 0.0001999478757817343, "loss": 0.8635, "step": 8580 }, { "epoch": 0.1508102319212065, "grad_norm": 0.06629747536370145, "learning_rate": 0.00019994751358955038, "loss": 0.8564, "step": 8590 }, { "epoch": 0.15098579680120788, "grad_norm": 0.061888001302653865, "learning_rate": 0.0001999471501436825, "loss": 0.8674, "step": 8600 }, { "epoch": 0.15116136168120928, "grad_norm": 0.09693073004753053, "learning_rate": 0.00019994678544413524, "loss": 0.8661, "step": 8610 }, { "epoch": 0.1513369265612107, "grad_norm": 0.06495360513609198, "learning_rate": 0.00019994641949091325, "loss": 0.8679, "step": 8620 }, { "epoch": 0.1515124914412121, "grad_norm": 0.08274464856364237, "learning_rate": 0.0001999460522840212, "loss": 0.8548, "step": 8630 }, { "epoch": 0.15168805632121352, "grad_norm": 0.08473108045655109, "learning_rate": 0.00019994568382346366, "loss": 0.8665, "step": 8640 }, { "epoch": 0.1518636212012149, "grad_norm": 0.05413280797841906, "learning_rate": 0.00019994531410924538, "loss": 0.8677, "step": 8650 }, { "epoch": 0.1520391860812163, "grad_norm": 0.09989397755513212, "learning_rate": 0.00019994494314137099, "loss": 0.8703, "step": 8660 }, { "epoch": 0.15221475096121773, "grad_norm": 0.06426253484216714, "learning_rate": 0.0001999445709198452, "loss": 0.8681, "step": 8670 }, { "epoch": 0.15239031584121912, "grad_norm": 0.08099422659628848, "learning_rate": 0.00019994419744467275, "loss": 0.8562, "step": 8680 }, { "epoch": 0.15256588072122051, "grad_norm": 0.08350950293638333, "learning_rate": 0.00019994382271585834, "loss": 0.8669, "step": 8690 }, { "epoch": 0.15274144560122194, "grad_norm": 0.08598099726378816, "learning_rate": 0.00019994344673340673, "loss": 0.859, "step": 8700 }, { "epoch": 0.15291701048122333, "grad_norm": 0.09080714439127639, "learning_rate": 0.0001999430694973227, "loss": 0.857, "step": 8710 }, { "epoch": 0.15309257536122475, "grad_norm": 0.09750691173403307, "learning_rate": 0.00019994269100761104, "loss": 0.8621, "step": 8720 }, { "epoch": 0.15326814024122615, "grad_norm": 0.09314856329411601, "learning_rate": 0.00019994231126427647, "loss": 0.8628, "step": 8730 }, { "epoch": 0.15344370512122754, "grad_norm": 0.0717703883288334, "learning_rate": 0.0001999419302673239, "loss": 0.8663, "step": 8740 }, { "epoch": 0.15361927000122896, "grad_norm": 0.07606708593332649, "learning_rate": 0.0001999415480167581, "loss": 0.8658, "step": 8750 }, { "epoch": 0.15379483488123036, "grad_norm": 0.0736511239698316, "learning_rate": 0.00019994116451258392, "loss": 0.8617, "step": 8760 }, { "epoch": 0.15397039976123175, "grad_norm": 0.06877581202991036, "learning_rate": 0.00019994077975480623, "loss": 0.8655, "step": 8770 }, { "epoch": 0.15414596464123317, "grad_norm": 0.08320934391222186, "learning_rate": 0.00019994039374342988, "loss": 0.8679, "step": 8780 }, { "epoch": 0.15432152952123457, "grad_norm": 0.09036133764983303, "learning_rate": 0.0001999400064784598, "loss": 0.8589, "step": 8790 }, { "epoch": 0.154497094401236, "grad_norm": 0.07083316708295039, "learning_rate": 0.00019993961795990086, "loss": 0.8633, "step": 8800 }, { "epoch": 0.15467265928123738, "grad_norm": 0.08465051474396952, "learning_rate": 0.00019993922818775802, "loss": 0.868, "step": 8810 }, { "epoch": 0.15484822416123878, "grad_norm": 0.08756670025858677, "learning_rate": 0.00019993883716203615, "loss": 0.8688, "step": 8820 }, { "epoch": 0.1550237890412402, "grad_norm": 0.05243013108793366, "learning_rate": 0.00019993844488274033, "loss": 0.862, "step": 8830 }, { "epoch": 0.1551993539212416, "grad_norm": 0.0725426233713026, "learning_rate": 0.00019993805134987537, "loss": 0.8609, "step": 8840 }, { "epoch": 0.155374918801243, "grad_norm": 0.07174115797642591, "learning_rate": 0.00019993765656344637, "loss": 0.865, "step": 8850 }, { "epoch": 0.1555504836812444, "grad_norm": 0.07427652010108483, "learning_rate": 0.00019993726052345832, "loss": 0.8645, "step": 8860 }, { "epoch": 0.1557260485612458, "grad_norm": 0.0661783330510821, "learning_rate": 0.00019993686322991618, "loss": 0.86, "step": 8870 }, { "epoch": 0.1559016134412472, "grad_norm": 0.06873827381797169, "learning_rate": 0.00019993646468282505, "loss": 0.8723, "step": 8880 }, { "epoch": 0.15607717832124862, "grad_norm": 0.08253006624171969, "learning_rate": 0.00019993606488218996, "loss": 0.8651, "step": 8890 }, { "epoch": 0.15625274320125002, "grad_norm": 0.06705395453343976, "learning_rate": 0.00019993566382801593, "loss": 0.8673, "step": 8900 }, { "epoch": 0.15642830808125144, "grad_norm": 0.0717721721666629, "learning_rate": 0.00019993526152030814, "loss": 0.8605, "step": 8910 }, { "epoch": 0.15660387296125283, "grad_norm": 0.09128937045023384, "learning_rate": 0.00019993485795907157, "loss": 0.8603, "step": 8920 }, { "epoch": 0.15677943784125423, "grad_norm": 0.06689444128726653, "learning_rate": 0.00019993445314431138, "loss": 0.86, "step": 8930 }, { "epoch": 0.15695500272125565, "grad_norm": 0.10546463248930352, "learning_rate": 0.00019993404707603274, "loss": 0.8608, "step": 8940 }, { "epoch": 0.15713056760125704, "grad_norm": 0.10985246939155664, "learning_rate": 0.00019993363975424078, "loss": 0.8559, "step": 8950 }, { "epoch": 0.15730613248125844, "grad_norm": 0.06452299563979243, "learning_rate": 0.0001999332311789406, "loss": 0.8574, "step": 8960 }, { "epoch": 0.15748169736125986, "grad_norm": 0.08962700170012455, "learning_rate": 0.00019993282135013745, "loss": 0.8516, "step": 8970 }, { "epoch": 0.15765726224126125, "grad_norm": 0.0640773885341476, "learning_rate": 0.0001999324102678365, "loss": 0.8554, "step": 8980 }, { "epoch": 0.15783282712126268, "grad_norm": 0.06504273381677973, "learning_rate": 0.00019993199793204294, "loss": 0.8572, "step": 8990 }, { "epoch": 0.15800839200126407, "grad_norm": 0.06773147516132705, "learning_rate": 0.00019993158434276198, "loss": 0.8636, "step": 9000 }, { "epoch": 0.15818395688126546, "grad_norm": 0.08432560892962908, "learning_rate": 0.00019993116949999892, "loss": 0.8695, "step": 9010 }, { "epoch": 0.15835952176126689, "grad_norm": 0.08843251950234295, "learning_rate": 0.00019993075340375894, "loss": 0.8667, "step": 9020 }, { "epoch": 0.15853508664126828, "grad_norm": 0.06882795747876028, "learning_rate": 0.00019993033605404737, "loss": 0.8695, "step": 9030 }, { "epoch": 0.15871065152126967, "grad_norm": 0.07453910741175059, "learning_rate": 0.00019992991745086954, "loss": 0.8645, "step": 9040 }, { "epoch": 0.1588862164012711, "grad_norm": 0.09526012524279467, "learning_rate": 0.00019992949759423066, "loss": 0.8593, "step": 9050 }, { "epoch": 0.1590617812812725, "grad_norm": 0.059080892730582174, "learning_rate": 0.00019992907648413603, "loss": 0.8635, "step": 9060 }, { "epoch": 0.15923734616127388, "grad_norm": 0.06289406373253201, "learning_rate": 0.00019992865412059112, "loss": 0.8654, "step": 9070 }, { "epoch": 0.1594129110412753, "grad_norm": 0.06531544699727798, "learning_rate": 0.00019992823050360117, "loss": 0.8632, "step": 9080 }, { "epoch": 0.1595884759212767, "grad_norm": 0.06129311560827095, "learning_rate": 0.00019992780563317158, "loss": 0.8595, "step": 9090 }, { "epoch": 0.15976404080127812, "grad_norm": 0.06118040644326025, "learning_rate": 0.00019992737950930774, "loss": 0.8611, "step": 9100 }, { "epoch": 0.15993960568127952, "grad_norm": 0.07307102016211205, "learning_rate": 0.00019992695213201503, "loss": 0.8735, "step": 9110 }, { "epoch": 0.1601151705612809, "grad_norm": 0.07474999947349535, "learning_rate": 0.0001999265235012989, "loss": 0.8592, "step": 9120 }, { "epoch": 0.16029073544128233, "grad_norm": 0.07473614119369282, "learning_rate": 0.00019992609361716473, "loss": 0.8637, "step": 9130 }, { "epoch": 0.16046630032128373, "grad_norm": 0.0694456417698654, "learning_rate": 0.000199925662479618, "loss": 0.8621, "step": 9140 }, { "epoch": 0.16064186520128512, "grad_norm": 0.07357894530195655, "learning_rate": 0.0001999252300886642, "loss": 0.8648, "step": 9150 }, { "epoch": 0.16081743008128654, "grad_norm": 0.0974038344717866, "learning_rate": 0.00019992479644430873, "loss": 0.8685, "step": 9160 }, { "epoch": 0.16099299496128794, "grad_norm": 0.07166206789135984, "learning_rate": 0.00019992436154655713, "loss": 0.8611, "step": 9170 }, { "epoch": 0.16116855984128936, "grad_norm": 0.06541982085557667, "learning_rate": 0.00019992392539541496, "loss": 0.8604, "step": 9180 }, { "epoch": 0.16134412472129075, "grad_norm": 0.09425575396858925, "learning_rate": 0.00019992348799088768, "loss": 0.8664, "step": 9190 }, { "epoch": 0.16151968960129215, "grad_norm": 0.07713841512934352, "learning_rate": 0.0001999230493329808, "loss": 0.8594, "step": 9200 }, { "epoch": 0.16169525448129357, "grad_norm": 0.08892005262494483, "learning_rate": 0.00019992260942169998, "loss": 0.8615, "step": 9210 }, { "epoch": 0.16187081936129497, "grad_norm": 0.08648344203153639, "learning_rate": 0.0001999221682570507, "loss": 0.8659, "step": 9220 }, { "epoch": 0.16204638424129636, "grad_norm": 0.07508612605281205, "learning_rate": 0.00019992172583903863, "loss": 0.8588, "step": 9230 }, { "epoch": 0.16222194912129778, "grad_norm": 0.08515763120203633, "learning_rate": 0.00019992128216766932, "loss": 0.8728, "step": 9240 }, { "epoch": 0.16239751400129918, "grad_norm": 0.07636141117605677, "learning_rate": 0.00019992083724294844, "loss": 0.8594, "step": 9250 }, { "epoch": 0.1625730788813006, "grad_norm": 0.07370722395278455, "learning_rate": 0.00019992039106488154, "loss": 0.8577, "step": 9260 }, { "epoch": 0.162748643761302, "grad_norm": 0.06145866029898632, "learning_rate": 0.00019991994363347432, "loss": 0.8626, "step": 9270 }, { "epoch": 0.1629242086413034, "grad_norm": 0.11141764531674557, "learning_rate": 0.0001999194949487325, "loss": 0.8624, "step": 9280 }, { "epoch": 0.1630997735213048, "grad_norm": 0.0584717758167465, "learning_rate": 0.00019991904501066168, "loss": 0.8691, "step": 9290 }, { "epoch": 0.1632753384013062, "grad_norm": 0.07412945180177175, "learning_rate": 0.00019991859381926765, "loss": 0.8715, "step": 9300 }, { "epoch": 0.1634509032813076, "grad_norm": 0.08891954981625651, "learning_rate": 0.00019991814137455604, "loss": 0.8645, "step": 9310 }, { "epoch": 0.16362646816130902, "grad_norm": 0.07560406663715527, "learning_rate": 0.00019991768767653264, "loss": 0.8587, "step": 9320 }, { "epoch": 0.1638020330413104, "grad_norm": 0.06974263696337847, "learning_rate": 0.0001999172327252032, "loss": 0.8718, "step": 9330 }, { "epoch": 0.1639775979213118, "grad_norm": 0.0930068009059249, "learning_rate": 0.00019991677652057344, "loss": 0.8676, "step": 9340 }, { "epoch": 0.16415316280131323, "grad_norm": 0.07334166879139871, "learning_rate": 0.00019991631906264917, "loss": 0.861, "step": 9350 }, { "epoch": 0.16432872768131462, "grad_norm": 0.08040603429290669, "learning_rate": 0.00019991586035143614, "loss": 0.8639, "step": 9360 }, { "epoch": 0.16450429256131605, "grad_norm": 0.08785954461313158, "learning_rate": 0.00019991540038694026, "loss": 0.8661, "step": 9370 }, { "epoch": 0.16467985744131744, "grad_norm": 0.07297271655161192, "learning_rate": 0.00019991493916916725, "loss": 0.8619, "step": 9380 }, { "epoch": 0.16485542232131883, "grad_norm": 0.0872625162816735, "learning_rate": 0.00019991447669812302, "loss": 0.8611, "step": 9390 }, { "epoch": 0.16503098720132026, "grad_norm": 0.058527402906654924, "learning_rate": 0.0001999140129738134, "loss": 0.854, "step": 9400 }, { "epoch": 0.16520655208132165, "grad_norm": 0.07645091298869681, "learning_rate": 0.00019991354799624435, "loss": 0.8596, "step": 9410 }, { "epoch": 0.16538211696132304, "grad_norm": 0.07515946241328171, "learning_rate": 0.00019991308176542166, "loss": 0.8648, "step": 9420 }, { "epoch": 0.16555768184132447, "grad_norm": 0.06370908578843607, "learning_rate": 0.00019991261428135122, "loss": 0.8592, "step": 9430 }, { "epoch": 0.16573324672132586, "grad_norm": 0.06769439804592255, "learning_rate": 0.00019991214554403904, "loss": 0.8591, "step": 9440 }, { "epoch": 0.16590881160132728, "grad_norm": 0.07474513832820746, "learning_rate": 0.000199911675553491, "loss": 0.8647, "step": 9450 }, { "epoch": 0.16608437648132868, "grad_norm": 0.08605403399755807, "learning_rate": 0.00019991120430971307, "loss": 0.8621, "step": 9460 }, { "epoch": 0.16625994136133007, "grad_norm": 0.09090060972933628, "learning_rate": 0.00019991073181271126, "loss": 0.8589, "step": 9470 }, { "epoch": 0.1664355062413315, "grad_norm": 0.06711289597312475, "learning_rate": 0.00019991025806249152, "loss": 0.8638, "step": 9480 }, { "epoch": 0.1666110711213329, "grad_norm": 0.08418296573504533, "learning_rate": 0.0001999097830590598, "loss": 0.8639, "step": 9490 }, { "epoch": 0.16678663600133428, "grad_norm": 0.09523003911502409, "learning_rate": 0.0001999093068024222, "loss": 0.8553, "step": 9500 }, { "epoch": 0.1669622008813357, "grad_norm": 0.0903098734399247, "learning_rate": 0.00019990882929258477, "loss": 0.857, "step": 9510 }, { "epoch": 0.1671377657613371, "grad_norm": 0.08511634410237613, "learning_rate": 0.0001999083505295535, "loss": 0.8599, "step": 9520 }, { "epoch": 0.16731333064133852, "grad_norm": 0.059944215657867214, "learning_rate": 0.0001999078705133344, "loss": 0.871, "step": 9530 }, { "epoch": 0.16748889552133991, "grad_norm": 0.09795039278638061, "learning_rate": 0.0001999073892439337, "loss": 0.8641, "step": 9540 }, { "epoch": 0.1676644604013413, "grad_norm": 0.08748207627566633, "learning_rate": 0.00019990690672135741, "loss": 0.8662, "step": 9550 }, { "epoch": 0.16784002528134273, "grad_norm": 0.08585452623106965, "learning_rate": 0.00019990642294561166, "loss": 0.8586, "step": 9560 }, { "epoch": 0.16801559016134412, "grad_norm": 0.06310994016824001, "learning_rate": 0.00019990593791670253, "loss": 0.8657, "step": 9570 }, { "epoch": 0.16819115504134552, "grad_norm": 0.07297600059616083, "learning_rate": 0.00019990545163463627, "loss": 0.8592, "step": 9580 }, { "epoch": 0.16836671992134694, "grad_norm": 0.08719214700742134, "learning_rate": 0.00019990496409941897, "loss": 0.8635, "step": 9590 }, { "epoch": 0.16854228480134834, "grad_norm": 0.07656986990784974, "learning_rate": 0.0001999044753110568, "loss": 0.8645, "step": 9600 }, { "epoch": 0.16871784968134973, "grad_norm": 0.055809223301214635, "learning_rate": 0.000199903985269556, "loss": 0.8581, "step": 9610 }, { "epoch": 0.16889341456135115, "grad_norm": 0.07736214787856818, "learning_rate": 0.0001999034939749227, "loss": 0.876, "step": 9620 }, { "epoch": 0.16906897944135255, "grad_norm": 0.07789077802051922, "learning_rate": 0.00019990300142716324, "loss": 0.8598, "step": 9630 }, { "epoch": 0.16924454432135397, "grad_norm": 0.05940681679442575, "learning_rate": 0.00019990250762628377, "loss": 0.8636, "step": 9640 }, { "epoch": 0.16942010920135536, "grad_norm": 0.08157368668557072, "learning_rate": 0.00019990201257229055, "loss": 0.8625, "step": 9650 }, { "epoch": 0.16959567408135676, "grad_norm": 0.06871332137266127, "learning_rate": 0.00019990151626518988, "loss": 0.8591, "step": 9660 }, { "epoch": 0.16977123896135818, "grad_norm": 0.09212902300392771, "learning_rate": 0.00019990101870498805, "loss": 0.8602, "step": 9670 }, { "epoch": 0.16994680384135957, "grad_norm": 0.07925498092327785, "learning_rate": 0.00019990051989169142, "loss": 0.855, "step": 9680 }, { "epoch": 0.17012236872136097, "grad_norm": 0.061002231256336664, "learning_rate": 0.00019990001982530617, "loss": 0.8723, "step": 9690 }, { "epoch": 0.1702979336013624, "grad_norm": 0.08544839049453211, "learning_rate": 0.00019989951850583873, "loss": 0.8612, "step": 9700 }, { "epoch": 0.17047349848136378, "grad_norm": 0.10128284114301066, "learning_rate": 0.00019989901593329547, "loss": 0.8617, "step": 9710 }, { "epoch": 0.1706490633613652, "grad_norm": 0.06371088723549555, "learning_rate": 0.00019989851210768268, "loss": 0.8614, "step": 9720 }, { "epoch": 0.1708246282413666, "grad_norm": 0.07405317542913095, "learning_rate": 0.00019989800702900683, "loss": 0.8643, "step": 9730 }, { "epoch": 0.171000193121368, "grad_norm": 0.06520291534634826, "learning_rate": 0.00019989750069727426, "loss": 0.8616, "step": 9740 }, { "epoch": 0.17117575800136942, "grad_norm": 0.07684439854613317, "learning_rate": 0.0001998969931124914, "loss": 0.8551, "step": 9750 }, { "epoch": 0.1713513228813708, "grad_norm": 0.0658949070761131, "learning_rate": 0.0001998964842746647, "loss": 0.8677, "step": 9760 }, { "epoch": 0.1715268877613722, "grad_norm": 0.07815243047259925, "learning_rate": 0.00019989597418380056, "loss": 0.8619, "step": 9770 }, { "epoch": 0.17170245264137363, "grad_norm": 0.08974931153381292, "learning_rate": 0.0001998954628399055, "loss": 0.8682, "step": 9780 }, { "epoch": 0.17187801752137502, "grad_norm": 0.06341055623095249, "learning_rate": 0.00019989495024298594, "loss": 0.8573, "step": 9790 }, { "epoch": 0.17205358240137641, "grad_norm": 0.07561132162507479, "learning_rate": 0.00019989443639304845, "loss": 0.8578, "step": 9800 }, { "epoch": 0.17222914728137784, "grad_norm": 0.07141542897370559, "learning_rate": 0.0001998939212900995, "loss": 0.8614, "step": 9810 }, { "epoch": 0.17240471216137923, "grad_norm": 0.06336151179522205, "learning_rate": 0.0001998934049341456, "loss": 0.868, "step": 9820 }, { "epoch": 0.17258027704138065, "grad_norm": 0.06267936064003009, "learning_rate": 0.0001998928873251933, "loss": 0.8639, "step": 9830 }, { "epoch": 0.17275584192138205, "grad_norm": 0.08223352671706154, "learning_rate": 0.00019989236846324916, "loss": 0.8519, "step": 9840 }, { "epoch": 0.17293140680138344, "grad_norm": 0.08007809677054974, "learning_rate": 0.00019989184834831974, "loss": 0.8639, "step": 9850 }, { "epoch": 0.17310697168138486, "grad_norm": 0.08067924754928234, "learning_rate": 0.00019989132698041166, "loss": 0.8632, "step": 9860 }, { "epoch": 0.17328253656138626, "grad_norm": 0.0872832718969682, "learning_rate": 0.00019989080435953155, "loss": 0.8626, "step": 9870 }, { "epoch": 0.17345810144138765, "grad_norm": 0.09930029191913392, "learning_rate": 0.00019989028048568596, "loss": 0.8635, "step": 9880 }, { "epoch": 0.17363366632138907, "grad_norm": 0.06457660700255355, "learning_rate": 0.0001998897553588816, "loss": 0.8678, "step": 9890 }, { "epoch": 0.17380923120139047, "grad_norm": 0.07163690067302121, "learning_rate": 0.00019988922897912502, "loss": 0.8584, "step": 9900 }, { "epoch": 0.1739847960813919, "grad_norm": 0.07333869228771073, "learning_rate": 0.00019988870134642303, "loss": 0.8584, "step": 9910 }, { "epoch": 0.17416036096139328, "grad_norm": 0.09920749126245873, "learning_rate": 0.0001998881724607822, "loss": 0.852, "step": 9920 }, { "epoch": 0.17433592584139468, "grad_norm": 0.0644264652166726, "learning_rate": 0.0001998876423222093, "loss": 0.872, "step": 9930 }, { "epoch": 0.1745114907213961, "grad_norm": 0.07553336032351063, "learning_rate": 0.000199887110930711, "loss": 0.8566, "step": 9940 }, { "epoch": 0.1746870556013975, "grad_norm": 0.08527588400704994, "learning_rate": 0.00019988657828629407, "loss": 0.8575, "step": 9950 }, { "epoch": 0.1748626204813989, "grad_norm": 0.08141966025606887, "learning_rate": 0.00019988604438896524, "loss": 0.8646, "step": 9960 }, { "epoch": 0.1750381853614003, "grad_norm": 0.07133836051483286, "learning_rate": 0.00019988550923873125, "loss": 0.8675, "step": 9970 }, { "epoch": 0.1752137502414017, "grad_norm": 0.09810695717967664, "learning_rate": 0.00019988497283559892, "loss": 0.8725, "step": 9980 }, { "epoch": 0.17538931512140313, "grad_norm": 0.11828264784501429, "learning_rate": 0.00019988443517957505, "loss": 0.864, "step": 9990 }, { "epoch": 0.17556488000140452, "grad_norm": 0.0981174809533362, "learning_rate": 0.0001998838962706664, "loss": 0.8655, "step": 10000 }, { "epoch": 0.17574044488140592, "grad_norm": 0.07510618966943149, "learning_rate": 0.00019988335610887986, "loss": 0.8572, "step": 10010 }, { "epoch": 0.17591600976140734, "grad_norm": 0.0794045636059406, "learning_rate": 0.00019988281469422226, "loss": 0.8545, "step": 10020 }, { "epoch": 0.17609157464140873, "grad_norm": 0.07908719577811062, "learning_rate": 0.00019988227202670042, "loss": 0.8601, "step": 10030 }, { "epoch": 0.17626713952141013, "grad_norm": 0.07650173189796122, "learning_rate": 0.00019988172810632125, "loss": 0.8686, "step": 10040 }, { "epoch": 0.17644270440141155, "grad_norm": 0.09709808068179498, "learning_rate": 0.00019988118293309164, "loss": 0.8622, "step": 10050 }, { "epoch": 0.17661826928141294, "grad_norm": 0.0652213694594962, "learning_rate": 0.00019988063650701847, "loss": 0.8646, "step": 10060 }, { "epoch": 0.17679383416141434, "grad_norm": 0.08625824341243918, "learning_rate": 0.00019988008882810873, "loss": 0.8657, "step": 10070 }, { "epoch": 0.17696939904141576, "grad_norm": 0.06513734209982208, "learning_rate": 0.00019987953989636929, "loss": 0.865, "step": 10080 }, { "epoch": 0.17714496392141715, "grad_norm": 0.06501170654325465, "learning_rate": 0.00019987898971180712, "loss": 0.8679, "step": 10090 }, { "epoch": 0.17732052880141858, "grad_norm": 0.08797938874798843, "learning_rate": 0.0001998784382744292, "loss": 0.8558, "step": 10100 }, { "epoch": 0.17749609368141997, "grad_norm": 0.0810016334857986, "learning_rate": 0.00019987788558424255, "loss": 0.867, "step": 10110 }, { "epoch": 0.17767165856142136, "grad_norm": 0.06992741931488768, "learning_rate": 0.0001998773316412541, "loss": 0.8624, "step": 10120 }, { "epoch": 0.17784722344142279, "grad_norm": 0.07819834816203552, "learning_rate": 0.00019987677644547092, "loss": 0.8552, "step": 10130 }, { "epoch": 0.17802278832142418, "grad_norm": 0.07305566569865295, "learning_rate": 0.00019987621999690002, "loss": 0.8684, "step": 10140 }, { "epoch": 0.17819835320142557, "grad_norm": 0.06063968947363287, "learning_rate": 0.00019987566229554847, "loss": 0.8566, "step": 10150 }, { "epoch": 0.178373918081427, "grad_norm": 0.06765427977699871, "learning_rate": 0.00019987510334142336, "loss": 0.8683, "step": 10160 }, { "epoch": 0.1785494829614284, "grad_norm": 0.09505738070814497, "learning_rate": 0.00019987454313453168, "loss": 0.8611, "step": 10170 }, { "epoch": 0.1787250478414298, "grad_norm": 0.10091325639644706, "learning_rate": 0.00019987398167488067, "loss": 0.8593, "step": 10180 }, { "epoch": 0.1789006127214312, "grad_norm": 0.05876109115479496, "learning_rate": 0.0001998734189624773, "loss": 0.8661, "step": 10190 }, { "epoch": 0.1790761776014326, "grad_norm": 0.07164693642008405, "learning_rate": 0.0001998728549973288, "loss": 0.8632, "step": 10200 }, { "epoch": 0.17925174248143402, "grad_norm": 0.10223541228131937, "learning_rate": 0.00019987228977944225, "loss": 0.8614, "step": 10210 }, { "epoch": 0.17942730736143542, "grad_norm": 0.10673932630499713, "learning_rate": 0.00019987172330882487, "loss": 0.8652, "step": 10220 }, { "epoch": 0.1796028722414368, "grad_norm": 0.08566428889908309, "learning_rate": 0.00019987115558548377, "loss": 0.8603, "step": 10230 }, { "epoch": 0.17977843712143823, "grad_norm": 0.07638623183843066, "learning_rate": 0.0001998705866094262, "loss": 0.8703, "step": 10240 }, { "epoch": 0.17995400200143963, "grad_norm": 0.06143653191409074, "learning_rate": 0.00019987001638065936, "loss": 0.8636, "step": 10250 }, { "epoch": 0.18012956688144105, "grad_norm": 0.09797499449695742, "learning_rate": 0.00019986944489919044, "loss": 0.8601, "step": 10260 }, { "epoch": 0.18030513176144244, "grad_norm": 0.09177709469051426, "learning_rate": 0.00019986887216502671, "loss": 0.8714, "step": 10270 }, { "epoch": 0.18048069664144384, "grad_norm": 0.07439648074158474, "learning_rate": 0.00019986829817817542, "loss": 0.8658, "step": 10280 }, { "epoch": 0.18065626152144526, "grad_norm": 0.08654006082653591, "learning_rate": 0.00019986772293864387, "loss": 0.8618, "step": 10290 }, { "epoch": 0.18083182640144665, "grad_norm": 0.07237361456581927, "learning_rate": 0.00019986714644643931, "loss": 0.8627, "step": 10300 }, { "epoch": 0.18100739128144805, "grad_norm": 0.09993362603667856, "learning_rate": 0.00019986656870156906, "loss": 0.8651, "step": 10310 }, { "epoch": 0.18118295616144947, "grad_norm": 0.07481642834526049, "learning_rate": 0.0001998659897040404, "loss": 0.8583, "step": 10320 }, { "epoch": 0.18135852104145087, "grad_norm": 0.1152709737564011, "learning_rate": 0.00019986540945386076, "loss": 0.8576, "step": 10330 }, { "epoch": 0.18153408592145226, "grad_norm": 0.069318894860848, "learning_rate": 0.0001998648279510374, "loss": 0.8584, "step": 10340 }, { "epoch": 0.18170965080145368, "grad_norm": 0.09216538914341131, "learning_rate": 0.00019986424519557775, "loss": 0.864, "step": 10350 }, { "epoch": 0.18188521568145508, "grad_norm": 0.07286481899983718, "learning_rate": 0.00019986366118748914, "loss": 0.8645, "step": 10360 }, { "epoch": 0.1820607805614565, "grad_norm": 0.0633612849678101, "learning_rate": 0.000199863075926779, "loss": 0.8594, "step": 10370 }, { "epoch": 0.1822363454414579, "grad_norm": 0.0727252022021232, "learning_rate": 0.00019986248941345473, "loss": 0.8567, "step": 10380 }, { "epoch": 0.1824119103214593, "grad_norm": 0.06255341485020453, "learning_rate": 0.00019986190164752378, "loss": 0.8664, "step": 10390 }, { "epoch": 0.1825874752014607, "grad_norm": 0.07910156387375265, "learning_rate": 0.0001998613126289936, "loss": 0.8636, "step": 10400 }, { "epoch": 0.1827630400814621, "grad_norm": 0.06288713765741527, "learning_rate": 0.00019986072235787163, "loss": 0.8662, "step": 10410 }, { "epoch": 0.1829386049614635, "grad_norm": 0.06922476971570464, "learning_rate": 0.00019986013083416536, "loss": 0.8605, "step": 10420 }, { "epoch": 0.18311416984146492, "grad_norm": 0.09526828707415765, "learning_rate": 0.00019985953805788228, "loss": 0.863, "step": 10430 }, { "epoch": 0.1832897347214663, "grad_norm": 0.07063473366972216, "learning_rate": 0.00019985894402902994, "loss": 0.8691, "step": 10440 }, { "epoch": 0.18346529960146774, "grad_norm": 0.08174686913646441, "learning_rate": 0.00019985834874761582, "loss": 0.8659, "step": 10450 }, { "epoch": 0.18364086448146913, "grad_norm": 0.08622751640065462, "learning_rate": 0.00019985775221364744, "loss": 0.8589, "step": 10460 }, { "epoch": 0.18381642936147052, "grad_norm": 0.0783923089370782, "learning_rate": 0.00019985715442713244, "loss": 0.8604, "step": 10470 }, { "epoch": 0.18399199424147195, "grad_norm": 0.11180507524826966, "learning_rate": 0.00019985655538807832, "loss": 0.8557, "step": 10480 }, { "epoch": 0.18416755912147334, "grad_norm": 0.09258147896363879, "learning_rate": 0.0001998559550964927, "loss": 0.8625, "step": 10490 }, { "epoch": 0.18434312400147473, "grad_norm": 0.07355802101648803, "learning_rate": 0.00019985535355238317, "loss": 0.8665, "step": 10500 }, { "epoch": 0.18451868888147616, "grad_norm": 0.059473917770376444, "learning_rate": 0.00019985475075575744, "loss": 0.8626, "step": 10510 }, { "epoch": 0.18469425376147755, "grad_norm": 0.05486967289995833, "learning_rate": 0.000199854146706623, "loss": 0.8585, "step": 10520 }, { "epoch": 0.18486981864147897, "grad_norm": 0.07351159326943973, "learning_rate": 0.00019985354140498757, "loss": 0.8651, "step": 10530 }, { "epoch": 0.18504538352148037, "grad_norm": 0.06073791995183849, "learning_rate": 0.00019985293485085887, "loss": 0.8592, "step": 10540 }, { "epoch": 0.18522094840148176, "grad_norm": 0.06580712461096797, "learning_rate": 0.0001998523270442445, "loss": 0.8713, "step": 10550 }, { "epoch": 0.18539651328148318, "grad_norm": 0.062035742732636096, "learning_rate": 0.0001998517179851522, "loss": 0.8671, "step": 10560 }, { "epoch": 0.18557207816148458, "grad_norm": 0.08481498201839642, "learning_rate": 0.00019985110767358974, "loss": 0.8699, "step": 10570 }, { "epoch": 0.18574764304148597, "grad_norm": 0.1009857302010947, "learning_rate": 0.00019985049610956476, "loss": 0.8594, "step": 10580 }, { "epoch": 0.1859232079214874, "grad_norm": 0.09539985912911572, "learning_rate": 0.00019984988329308507, "loss": 0.8649, "step": 10590 }, { "epoch": 0.1860987728014888, "grad_norm": 0.08590163307990636, "learning_rate": 0.00019984926922415842, "loss": 0.8637, "step": 10600 }, { "epoch": 0.18627433768149018, "grad_norm": 0.08194726497148588, "learning_rate": 0.00019984865390279254, "loss": 0.8636, "step": 10610 }, { "epoch": 0.1864499025614916, "grad_norm": 0.057813856647796716, "learning_rate": 0.0001998480373289953, "loss": 0.8622, "step": 10620 }, { "epoch": 0.186625467441493, "grad_norm": 0.12474027693734238, "learning_rate": 0.0001998474195027745, "loss": 0.8646, "step": 10630 }, { "epoch": 0.18680103232149442, "grad_norm": 0.05767733550029884, "learning_rate": 0.00019984680042413794, "loss": 0.8722, "step": 10640 }, { "epoch": 0.18697659720149581, "grad_norm": 0.06862991104147567, "learning_rate": 0.00019984618009309346, "loss": 0.8629, "step": 10650 }, { "epoch": 0.1871521620814972, "grad_norm": 0.07427064090856203, "learning_rate": 0.0001998455585096489, "loss": 0.8591, "step": 10660 }, { "epoch": 0.18732772696149863, "grad_norm": 0.06860451823827075, "learning_rate": 0.00019984493567381224, "loss": 0.8636, "step": 10670 }, { "epoch": 0.18750329184150002, "grad_norm": 0.06345813381783684, "learning_rate": 0.00019984431158559128, "loss": 0.8653, "step": 10680 }, { "epoch": 0.18767885672150142, "grad_norm": 0.09711219588180658, "learning_rate": 0.0001998436862449939, "loss": 0.8649, "step": 10690 }, { "epoch": 0.18785442160150284, "grad_norm": 0.08837972293904665, "learning_rate": 0.00019984305965202817, "loss": 0.8652, "step": 10700 }, { "epoch": 0.18802998648150424, "grad_norm": 0.08238708874363787, "learning_rate": 0.00019984243180670185, "loss": 0.859, "step": 10710 }, { "epoch": 0.18820555136150566, "grad_norm": 0.060340071533053825, "learning_rate": 0.000199841802709023, "loss": 0.8622, "step": 10720 }, { "epoch": 0.18838111624150705, "grad_norm": 0.10345411122114005, "learning_rate": 0.00019984117235899958, "loss": 0.863, "step": 10730 }, { "epoch": 0.18855668112150845, "grad_norm": 0.06709925173298303, "learning_rate": 0.00019984054075663954, "loss": 0.8605, "step": 10740 }, { "epoch": 0.18873224600150987, "grad_norm": 0.08781933699617814, "learning_rate": 0.0001998399079019509, "loss": 0.8652, "step": 10750 }, { "epoch": 0.18890781088151126, "grad_norm": 0.054989626672634837, "learning_rate": 0.0001998392737949417, "loss": 0.8578, "step": 10760 }, { "epoch": 0.18908337576151266, "grad_norm": 0.09930997781741459, "learning_rate": 0.00019983863843561997, "loss": 0.8637, "step": 10770 }, { "epoch": 0.18925894064151408, "grad_norm": 0.08787036559542731, "learning_rate": 0.00019983800182399374, "loss": 0.8632, "step": 10780 }, { "epoch": 0.18943450552151547, "grad_norm": 0.06013754318342283, "learning_rate": 0.00019983736396007108, "loss": 0.8543, "step": 10790 }, { "epoch": 0.18961007040151687, "grad_norm": 0.07577025932806009, "learning_rate": 0.0001998367248438601, "loss": 0.869, "step": 10800 }, { "epoch": 0.1897856352815183, "grad_norm": 0.0612473124830451, "learning_rate": 0.00019983608447536884, "loss": 0.8581, "step": 10810 }, { "epoch": 0.18996120016151968, "grad_norm": 0.0930281877903287, "learning_rate": 0.00019983544285460546, "loss": 0.8613, "step": 10820 }, { "epoch": 0.1901367650415211, "grad_norm": 0.06382876186842239, "learning_rate": 0.00019983479998157805, "loss": 0.8593, "step": 10830 }, { "epoch": 0.1903123299215225, "grad_norm": 0.06713671265324865, "learning_rate": 0.0001998341558562948, "loss": 0.8589, "step": 10840 }, { "epoch": 0.1904878948015239, "grad_norm": 0.07192818678303677, "learning_rate": 0.0001998335104787639, "loss": 0.8607, "step": 10850 }, { "epoch": 0.19066345968152532, "grad_norm": 0.07335446635119781, "learning_rate": 0.00019983286384899344, "loss": 0.8652, "step": 10860 }, { "epoch": 0.1908390245615267, "grad_norm": 0.08007270518931298, "learning_rate": 0.00019983221596699165, "loss": 0.8538, "step": 10870 }, { "epoch": 0.1910145894415281, "grad_norm": 0.07802218035351148, "learning_rate": 0.00019983156683276676, "loss": 0.8593, "step": 10880 }, { "epoch": 0.19119015432152953, "grad_norm": 0.07005898170223804, "learning_rate": 0.000199830916446327, "loss": 0.8618, "step": 10890 }, { "epoch": 0.19136571920153092, "grad_norm": 0.08864190491014051, "learning_rate": 0.00019983026480768052, "loss": 0.8652, "step": 10900 }, { "epoch": 0.19154128408153234, "grad_norm": 0.07526426022242097, "learning_rate": 0.0001998296119168357, "loss": 0.8656, "step": 10910 }, { "epoch": 0.19171684896153374, "grad_norm": 0.06433782607527952, "learning_rate": 0.00019982895777380074, "loss": 0.8617, "step": 10920 }, { "epoch": 0.19189241384153513, "grad_norm": 0.0634146377423622, "learning_rate": 0.0001998283023785839, "loss": 0.8595, "step": 10930 }, { "epoch": 0.19206797872153655, "grad_norm": 0.08879654579806198, "learning_rate": 0.0001998276457311936, "loss": 0.862, "step": 10940 }, { "epoch": 0.19224354360153795, "grad_norm": 0.08027176823222054, "learning_rate": 0.00019982698783163806, "loss": 0.8583, "step": 10950 }, { "epoch": 0.19241910848153934, "grad_norm": 0.09072746953058013, "learning_rate": 0.00019982632867992562, "loss": 0.8674, "step": 10960 }, { "epoch": 0.19259467336154076, "grad_norm": 0.06439002486022939, "learning_rate": 0.00019982566827606469, "loss": 0.8637, "step": 10970 }, { "epoch": 0.19277023824154216, "grad_norm": 0.10195226438734444, "learning_rate": 0.0001998250066200636, "loss": 0.8583, "step": 10980 }, { "epoch": 0.19294580312154358, "grad_norm": 0.09107915214181189, "learning_rate": 0.00019982434371193072, "loss": 0.861, "step": 10990 }, { "epoch": 0.19312136800154497, "grad_norm": 0.07710625417041224, "learning_rate": 0.0001998236795516745, "loss": 0.86, "step": 11000 }, { "epoch": 0.19329693288154637, "grad_norm": 0.05664192454959979, "learning_rate": 0.0001998230141393033, "loss": 0.8598, "step": 11010 }, { "epoch": 0.1934724977615478, "grad_norm": 0.06328225386522277, "learning_rate": 0.00019982234747482552, "loss": 0.8667, "step": 11020 }, { "epoch": 0.19364806264154918, "grad_norm": 0.08066251156125577, "learning_rate": 0.00019982167955824974, "loss": 0.8648, "step": 11030 }, { "epoch": 0.19382362752155058, "grad_norm": 0.08480385743337594, "learning_rate": 0.00019982101038958428, "loss": 0.8645, "step": 11040 }, { "epoch": 0.193999192401552, "grad_norm": 0.06109001401566652, "learning_rate": 0.0001998203399688377, "loss": 0.8693, "step": 11050 }, { "epoch": 0.1941747572815534, "grad_norm": 0.06516817049012194, "learning_rate": 0.0001998196682960185, "loss": 0.8638, "step": 11060 }, { "epoch": 0.1943503221615548, "grad_norm": 0.07468404178305846, "learning_rate": 0.00019981899537113514, "loss": 0.8605, "step": 11070 }, { "epoch": 0.1945258870415562, "grad_norm": 0.06518877176803846, "learning_rate": 0.00019981832119419618, "loss": 0.8597, "step": 11080 }, { "epoch": 0.1947014519215576, "grad_norm": 0.09716435378391924, "learning_rate": 0.00019981764576521013, "loss": 0.8594, "step": 11090 }, { "epoch": 0.19487701680155903, "grad_norm": 0.07029988431118245, "learning_rate": 0.00019981696908418558, "loss": 0.859, "step": 11100 }, { "epoch": 0.19505258168156042, "grad_norm": 0.07597425786466074, "learning_rate": 0.0001998162911511311, "loss": 0.8634, "step": 11110 }, { "epoch": 0.19522814656156182, "grad_norm": 0.07103170200190047, "learning_rate": 0.00019981561196605527, "loss": 0.8649, "step": 11120 }, { "epoch": 0.19540371144156324, "grad_norm": 0.08409596394265781, "learning_rate": 0.0001998149315289667, "loss": 0.8614, "step": 11130 }, { "epoch": 0.19557927632156463, "grad_norm": 0.06598690166878926, "learning_rate": 0.00019981424983987403, "loss": 0.8584, "step": 11140 }, { "epoch": 0.19575484120156603, "grad_norm": 0.08066397899992475, "learning_rate": 0.00019981356689878583, "loss": 0.8602, "step": 11150 }, { "epoch": 0.19593040608156745, "grad_norm": 0.10522217700109793, "learning_rate": 0.00019981288270571083, "loss": 0.8638, "step": 11160 }, { "epoch": 0.19610597096156884, "grad_norm": 0.08929543182791812, "learning_rate": 0.00019981219726065768, "loss": 0.8578, "step": 11170 }, { "epoch": 0.19628153584157026, "grad_norm": 0.1062191647129973, "learning_rate": 0.00019981151056363503, "loss": 0.863, "step": 11180 }, { "epoch": 0.19645710072157166, "grad_norm": 0.07284430449164271, "learning_rate": 0.0001998108226146516, "loss": 0.8579, "step": 11190 }, { "epoch": 0.19663266560157305, "grad_norm": 0.0699500337169853, "learning_rate": 0.00019981013341371614, "loss": 0.8567, "step": 11200 }, { "epoch": 0.19680823048157448, "grad_norm": 0.11056444393694914, "learning_rate": 0.00019980944296083732, "loss": 0.8611, "step": 11210 }, { "epoch": 0.19698379536157587, "grad_norm": 0.06689444397647458, "learning_rate": 0.00019980875125602394, "loss": 0.8717, "step": 11220 }, { "epoch": 0.19715936024157726, "grad_norm": 0.0757317716098211, "learning_rate": 0.00019980805829928472, "loss": 0.8622, "step": 11230 }, { "epoch": 0.19733492512157869, "grad_norm": 0.11467404944023815, "learning_rate": 0.00019980736409062853, "loss": 0.8565, "step": 11240 }, { "epoch": 0.19751049000158008, "grad_norm": 0.06896553570119207, "learning_rate": 0.00019980666863006404, "loss": 0.8584, "step": 11250 }, { "epoch": 0.1976860548815815, "grad_norm": 0.07411036410909033, "learning_rate": 0.00019980597191760011, "loss": 0.8538, "step": 11260 }, { "epoch": 0.1978616197615829, "grad_norm": 0.05414463987679983, "learning_rate": 0.00019980527395324562, "loss": 0.8625, "step": 11270 }, { "epoch": 0.1980371846415843, "grad_norm": 0.06654404691204416, "learning_rate": 0.00019980457473700938, "loss": 0.8607, "step": 11280 }, { "epoch": 0.1982127495215857, "grad_norm": 0.05940047775418309, "learning_rate": 0.00019980387426890022, "loss": 0.8604, "step": 11290 }, { "epoch": 0.1983883144015871, "grad_norm": 0.07291847057122354, "learning_rate": 0.00019980317254892705, "loss": 0.8572, "step": 11300 }, { "epoch": 0.1985638792815885, "grad_norm": 0.0502920404382447, "learning_rate": 0.0001998024695770987, "loss": 0.8646, "step": 11310 }, { "epoch": 0.19873944416158992, "grad_norm": 0.10904238733366504, "learning_rate": 0.00019980176535342418, "loss": 0.8555, "step": 11320 }, { "epoch": 0.19891500904159132, "grad_norm": 0.06915380819181416, "learning_rate": 0.00019980105987791235, "loss": 0.8616, "step": 11330 }, { "epoch": 0.1990905739215927, "grad_norm": 0.08383934681825642, "learning_rate": 0.00019980035315057216, "loss": 0.8651, "step": 11340 }, { "epoch": 0.19926613880159413, "grad_norm": 0.07747787055189378, "learning_rate": 0.00019979964517141254, "loss": 0.8533, "step": 11350 }, { "epoch": 0.19944170368159553, "grad_norm": 0.11025951747688771, "learning_rate": 0.0001997989359404425, "loss": 0.8568, "step": 11360 }, { "epoch": 0.19961726856159695, "grad_norm": 0.06865460749006481, "learning_rate": 0.00019979822545767098, "loss": 0.858, "step": 11370 }, { "epoch": 0.19979283344159834, "grad_norm": 0.07287576534441402, "learning_rate": 0.00019979751372310703, "loss": 0.8623, "step": 11380 }, { "epoch": 0.19996839832159974, "grad_norm": 0.06905723856559356, "learning_rate": 0.00019979680073675966, "loss": 0.8626, "step": 11390 }, { "epoch": 0.20014396320160116, "grad_norm": 0.06974754431925725, "learning_rate": 0.00019979608649863787, "loss": 0.8663, "step": 11400 }, { "epoch": 0.20031952808160255, "grad_norm": 0.07174432648102101, "learning_rate": 0.00019979537100875072, "loss": 0.8545, "step": 11410 }, { "epoch": 0.20049509296160395, "grad_norm": 0.05877411251113119, "learning_rate": 0.0001997946542671073, "loss": 0.8635, "step": 11420 }, { "epoch": 0.20067065784160537, "grad_norm": 0.06443476471552287, "learning_rate": 0.00019979393627371668, "loss": 0.8691, "step": 11430 }, { "epoch": 0.20084622272160677, "grad_norm": 0.06711153342882789, "learning_rate": 0.00019979321702858792, "loss": 0.8627, "step": 11440 }, { "epoch": 0.2010217876016082, "grad_norm": 0.05604570745637266, "learning_rate": 0.00019979249653173022, "loss": 0.8558, "step": 11450 }, { "epoch": 0.20119735248160958, "grad_norm": 0.07434165518192266, "learning_rate": 0.00019979177478315262, "loss": 0.8607, "step": 11460 }, { "epoch": 0.20137291736161098, "grad_norm": 0.07289731992796257, "learning_rate": 0.00019979105178286432, "loss": 0.8569, "step": 11470 }, { "epoch": 0.2015484822416124, "grad_norm": 0.07410009019828331, "learning_rate": 0.00019979032753087445, "loss": 0.8661, "step": 11480 }, { "epoch": 0.2017240471216138, "grad_norm": 0.08828807351892376, "learning_rate": 0.0001997896020271922, "loss": 0.8635, "step": 11490 }, { "epoch": 0.2018996120016152, "grad_norm": 0.06020463855990833, "learning_rate": 0.00019978887527182676, "loss": 0.8607, "step": 11500 }, { "epoch": 0.2020751768816166, "grad_norm": 0.07332687669787262, "learning_rate": 0.0001997881472647873, "loss": 0.8598, "step": 11510 }, { "epoch": 0.202250741761618, "grad_norm": 0.06753671941631845, "learning_rate": 0.00019978741800608313, "loss": 0.8505, "step": 11520 }, { "epoch": 0.2024263066416194, "grad_norm": 0.08485753273520577, "learning_rate": 0.0001997866874957234, "loss": 0.8627, "step": 11530 }, { "epoch": 0.20260187152162082, "grad_norm": 0.11056422178464878, "learning_rate": 0.00019978595573371747, "loss": 0.8624, "step": 11540 }, { "epoch": 0.2027774364016222, "grad_norm": 0.08050322495035325, "learning_rate": 0.00019978522272007452, "loss": 0.8675, "step": 11550 }, { "epoch": 0.20295300128162364, "grad_norm": 0.08792617465616541, "learning_rate": 0.00019978448845480386, "loss": 0.8575, "step": 11560 }, { "epoch": 0.20312856616162503, "grad_norm": 0.07745796590646534, "learning_rate": 0.0001997837529379148, "loss": 0.8565, "step": 11570 }, { "epoch": 0.20330413104162642, "grad_norm": 0.06831318790288808, "learning_rate": 0.00019978301616941667, "loss": 0.8637, "step": 11580 }, { "epoch": 0.20347969592162785, "grad_norm": 0.10158133947793363, "learning_rate": 0.00019978227814931878, "loss": 0.8535, "step": 11590 }, { "epoch": 0.20365526080162924, "grad_norm": 0.06764835212078524, "learning_rate": 0.00019978153887763048, "loss": 0.8616, "step": 11600 }, { "epoch": 0.20383082568163063, "grad_norm": 0.07481663328383992, "learning_rate": 0.00019978079835436118, "loss": 0.8646, "step": 11610 }, { "epoch": 0.20400639056163206, "grad_norm": 0.06183354158690388, "learning_rate": 0.0001997800565795202, "loss": 0.8645, "step": 11620 }, { "epoch": 0.20418195544163345, "grad_norm": 0.05399871093881159, "learning_rate": 0.00019977931355311698, "loss": 0.859, "step": 11630 }, { "epoch": 0.20435752032163487, "grad_norm": 0.08299789254881017, "learning_rate": 0.00019977856927516093, "loss": 0.8619, "step": 11640 }, { "epoch": 0.20453308520163627, "grad_norm": 0.07270330571629706, "learning_rate": 0.0001997778237456615, "loss": 0.8637, "step": 11650 }, { "epoch": 0.20470865008163766, "grad_norm": 0.06890626820832899, "learning_rate": 0.00019977707696462808, "loss": 0.8658, "step": 11660 }, { "epoch": 0.20488421496163908, "grad_norm": 0.09422152704997061, "learning_rate": 0.0001997763289320702, "loss": 0.8569, "step": 11670 }, { "epoch": 0.20505977984164048, "grad_norm": 0.0717091126915168, "learning_rate": 0.0001997755796479973, "loss": 0.8598, "step": 11680 }, { "epoch": 0.20523534472164187, "grad_norm": 0.06599919251117033, "learning_rate": 0.00019977482911241883, "loss": 0.8595, "step": 11690 }, { "epoch": 0.2054109096016433, "grad_norm": 0.05899318554996764, "learning_rate": 0.0001997740773253444, "loss": 0.8602, "step": 11700 }, { "epoch": 0.2055864744816447, "grad_norm": 0.05902171827836532, "learning_rate": 0.00019977332428678345, "loss": 0.8633, "step": 11710 }, { "epoch": 0.2057620393616461, "grad_norm": 0.0569672865231465, "learning_rate": 0.00019977256999674557, "loss": 0.8592, "step": 11720 }, { "epoch": 0.2059376042416475, "grad_norm": 0.053822986577173826, "learning_rate": 0.00019977181445524027, "loss": 0.8561, "step": 11730 }, { "epoch": 0.2061131691216489, "grad_norm": 0.06508445265780063, "learning_rate": 0.00019977105766227714, "loss": 0.8616, "step": 11740 }, { "epoch": 0.20628873400165032, "grad_norm": 0.06383980581889599, "learning_rate": 0.00019977029961786583, "loss": 0.865, "step": 11750 }, { "epoch": 0.20646429888165171, "grad_norm": 0.06884783399451656, "learning_rate": 0.00019976954032201587, "loss": 0.857, "step": 11760 }, { "epoch": 0.2066398637616531, "grad_norm": 0.06528916759534233, "learning_rate": 0.0001997687797747369, "loss": 0.8554, "step": 11770 }, { "epoch": 0.20681542864165453, "grad_norm": 0.07876543663359256, "learning_rate": 0.00019976801797603856, "loss": 0.8631, "step": 11780 }, { "epoch": 0.20699099352165592, "grad_norm": 0.07281560909354284, "learning_rate": 0.0001997672549259305, "loss": 0.8466, "step": 11790 }, { "epoch": 0.20716655840165732, "grad_norm": 0.055311249116328634, "learning_rate": 0.00019976649062442242, "loss": 0.8504, "step": 11800 }, { "epoch": 0.20734212328165874, "grad_norm": 0.09716586697776519, "learning_rate": 0.00019976572507152395, "loss": 0.8555, "step": 11810 }, { "epoch": 0.20751768816166014, "grad_norm": 0.06113500635489527, "learning_rate": 0.00019976495826724483, "loss": 0.8647, "step": 11820 }, { "epoch": 0.20769325304166156, "grad_norm": 0.07027966528696562, "learning_rate": 0.00019976419021159475, "loss": 0.8542, "step": 11830 }, { "epoch": 0.20786881792166295, "grad_norm": 0.07100812153254037, "learning_rate": 0.00019976342090458345, "loss": 0.8556, "step": 11840 }, { "epoch": 0.20804438280166435, "grad_norm": 0.0772208897526919, "learning_rate": 0.00019976265034622067, "loss": 0.8631, "step": 11850 }, { "epoch": 0.20821994768166577, "grad_norm": 0.07608892142316544, "learning_rate": 0.00019976187853651623, "loss": 0.8646, "step": 11860 }, { "epoch": 0.20839551256166716, "grad_norm": 0.07452072461172539, "learning_rate": 0.0001997611054754798, "loss": 0.8641, "step": 11870 }, { "epoch": 0.20857107744166856, "grad_norm": 0.08598566015782368, "learning_rate": 0.00019976033116312126, "loss": 0.8574, "step": 11880 }, { "epoch": 0.20874664232166998, "grad_norm": 0.07191413271910875, "learning_rate": 0.00019975955559945038, "loss": 0.8643, "step": 11890 }, { "epoch": 0.20892220720167137, "grad_norm": 0.1335582315344332, "learning_rate": 0.00019975877878447703, "loss": 0.8595, "step": 11900 }, { "epoch": 0.2090977720816728, "grad_norm": 0.09884260485481354, "learning_rate": 0.00019975800071821105, "loss": 0.8642, "step": 11910 }, { "epoch": 0.2092733369616742, "grad_norm": 0.0840633742921738, "learning_rate": 0.0001997572214006622, "loss": 0.8587, "step": 11920 }, { "epoch": 0.20944890184167558, "grad_norm": 0.07193175031098976, "learning_rate": 0.0001997564408318405, "loss": 0.8714, "step": 11930 }, { "epoch": 0.209624466721677, "grad_norm": 0.07566148052754874, "learning_rate": 0.00019975565901175573, "loss": 0.8676, "step": 11940 }, { "epoch": 0.2098000316016784, "grad_norm": 0.07856779267741669, "learning_rate": 0.00019975487594041784, "loss": 0.8666, "step": 11950 }, { "epoch": 0.2099755964816798, "grad_norm": 0.06873151821347684, "learning_rate": 0.00019975409161783676, "loss": 0.8564, "step": 11960 }, { "epoch": 0.21015116136168122, "grad_norm": 0.0749725187738329, "learning_rate": 0.0001997533060440224, "loss": 0.8613, "step": 11970 }, { "epoch": 0.2103267262416826, "grad_norm": 0.06714006955178443, "learning_rate": 0.00019975251921898474, "loss": 0.8574, "step": 11980 }, { "epoch": 0.21050229112168403, "grad_norm": 0.07947337013478468, "learning_rate": 0.0001997517311427337, "loss": 0.8624, "step": 11990 }, { "epoch": 0.21067785600168543, "grad_norm": 0.08850951615386116, "learning_rate": 0.00019975094181527933, "loss": 0.861, "step": 12000 }, { "epoch": 0.21085342088168682, "grad_norm": 0.08457137148210023, "learning_rate": 0.00019975015123663162, "loss": 0.8663, "step": 12010 }, { "epoch": 0.21102898576168824, "grad_norm": 0.06436894671536832, "learning_rate": 0.00019974935940680053, "loss": 0.8672, "step": 12020 }, { "epoch": 0.21120455064168964, "grad_norm": 0.06571936647156754, "learning_rate": 0.00019974856632579613, "loss": 0.8657, "step": 12030 }, { "epoch": 0.21138011552169103, "grad_norm": 0.06963931656300218, "learning_rate": 0.0001997477719936285, "loss": 0.8597, "step": 12040 }, { "epoch": 0.21155568040169245, "grad_norm": 0.08080260915798766, "learning_rate": 0.00019974697641030766, "loss": 0.8608, "step": 12050 }, { "epoch": 0.21173124528169385, "grad_norm": 0.10101367578936203, "learning_rate": 0.00019974617957584366, "loss": 0.86, "step": 12060 }, { "epoch": 0.21190681016169524, "grad_norm": 0.08444507697876563, "learning_rate": 0.00019974538149024673, "loss": 0.8619, "step": 12070 }, { "epoch": 0.21208237504169666, "grad_norm": 0.07394267130563466, "learning_rate": 0.00019974458215352682, "loss": 0.8586, "step": 12080 }, { "epoch": 0.21225793992169806, "grad_norm": 0.1029191545630217, "learning_rate": 0.00019974378156569416, "loss": 0.8596, "step": 12090 }, { "epoch": 0.21243350480169948, "grad_norm": 0.08594674102552544, "learning_rate": 0.00019974297972675885, "loss": 0.8558, "step": 12100 }, { "epoch": 0.21260906968170087, "grad_norm": 0.0927348594819347, "learning_rate": 0.0001997421766367311, "loss": 0.867, "step": 12110 }, { "epoch": 0.21278463456170227, "grad_norm": 0.08588534924362012, "learning_rate": 0.000199741372295621, "loss": 0.8629, "step": 12120 }, { "epoch": 0.2129601994417037, "grad_norm": 0.06768947614597055, "learning_rate": 0.00019974056670343882, "loss": 0.8609, "step": 12130 }, { "epoch": 0.21313576432170508, "grad_norm": 0.0680775308835976, "learning_rate": 0.00019973975986019472, "loss": 0.8595, "step": 12140 }, { "epoch": 0.21331132920170648, "grad_norm": 0.06114018108191955, "learning_rate": 0.00019973895176589895, "loss": 0.8708, "step": 12150 }, { "epoch": 0.2134868940817079, "grad_norm": 0.07272345753240134, "learning_rate": 0.00019973814242056173, "loss": 0.858, "step": 12160 }, { "epoch": 0.2136624589617093, "grad_norm": 0.06421246506344783, "learning_rate": 0.00019973733182419333, "loss": 0.8577, "step": 12170 }, { "epoch": 0.21383802384171072, "grad_norm": 0.07966401394686959, "learning_rate": 0.000199736519976804, "loss": 0.8577, "step": 12180 }, { "epoch": 0.2140135887217121, "grad_norm": 0.064671842191974, "learning_rate": 0.00019973570687840405, "loss": 0.8652, "step": 12190 }, { "epoch": 0.2141891536017135, "grad_norm": 0.07796808793021955, "learning_rate": 0.00019973489252900377, "loss": 0.8635, "step": 12200 }, { "epoch": 0.21436471848171493, "grad_norm": 0.05500961954160095, "learning_rate": 0.00019973407692861346, "loss": 0.8657, "step": 12210 }, { "epoch": 0.21454028336171632, "grad_norm": 0.08682530409346874, "learning_rate": 0.0001997332600772435, "loss": 0.8627, "step": 12220 }, { "epoch": 0.21471584824171772, "grad_norm": 0.0707245697063651, "learning_rate": 0.0001997324419749042, "loss": 0.8571, "step": 12230 }, { "epoch": 0.21489141312171914, "grad_norm": 0.09458535843728595, "learning_rate": 0.00019973162262160594, "loss": 0.8656, "step": 12240 }, { "epoch": 0.21506697800172053, "grad_norm": 0.08256330933791954, "learning_rate": 0.0001997308020173591, "loss": 0.8599, "step": 12250 }, { "epoch": 0.21524254288172193, "grad_norm": 0.12645388072969208, "learning_rate": 0.00019972998016217408, "loss": 0.8644, "step": 12260 }, { "epoch": 0.21541810776172335, "grad_norm": 0.06369345063309637, "learning_rate": 0.00019972915705606127, "loss": 0.8574, "step": 12270 }, { "epoch": 0.21559367264172474, "grad_norm": 0.06748337639120253, "learning_rate": 0.00019972833269903115, "loss": 0.8524, "step": 12280 }, { "epoch": 0.21576923752172616, "grad_norm": 0.06381258224809104, "learning_rate": 0.00019972750709109413, "loss": 0.868, "step": 12290 }, { "epoch": 0.21594480240172756, "grad_norm": 0.08841314655687235, "learning_rate": 0.00019972668023226063, "loss": 0.8642, "step": 12300 }, { "epoch": 0.21612036728172895, "grad_norm": 0.06914650960148769, "learning_rate": 0.00019972585212254125, "loss": 0.8642, "step": 12310 }, { "epoch": 0.21629593216173038, "grad_norm": 0.07303558688391464, "learning_rate": 0.00019972502276194637, "loss": 0.8618, "step": 12320 }, { "epoch": 0.21647149704173177, "grad_norm": 0.07985173937184216, "learning_rate": 0.0001997241921504865, "loss": 0.8656, "step": 12330 }, { "epoch": 0.21664706192173316, "grad_norm": 0.09654733341280401, "learning_rate": 0.0001997233602881722, "loss": 0.864, "step": 12340 }, { "epoch": 0.21682262680173459, "grad_norm": 0.06110114775268228, "learning_rate": 0.000199722527175014, "loss": 0.86, "step": 12350 }, { "epoch": 0.21699819168173598, "grad_norm": 0.07355724160720263, "learning_rate": 0.0001997216928110225, "loss": 0.8624, "step": 12360 }, { "epoch": 0.2171737565617374, "grad_norm": 0.0519312159382691, "learning_rate": 0.0001997208571962082, "loss": 0.8604, "step": 12370 }, { "epoch": 0.2173493214417388, "grad_norm": 0.07711396829642897, "learning_rate": 0.00019972002033058175, "loss": 0.8609, "step": 12380 }, { "epoch": 0.2175248863217402, "grad_norm": 0.06496701853996166, "learning_rate": 0.00019971918221415368, "loss": 0.8612, "step": 12390 }, { "epoch": 0.2177004512017416, "grad_norm": 0.07147394500416392, "learning_rate": 0.0001997183428469347, "loss": 0.86, "step": 12400 }, { "epoch": 0.217876016081743, "grad_norm": 0.08072310608274053, "learning_rate": 0.00019971750222893536, "loss": 0.865, "step": 12410 }, { "epoch": 0.2180515809617444, "grad_norm": 0.061596285709073675, "learning_rate": 0.00019971666036016635, "loss": 0.8583, "step": 12420 }, { "epoch": 0.21822714584174582, "grad_norm": 0.07435532849128003, "learning_rate": 0.00019971581724063836, "loss": 0.8668, "step": 12430 }, { "epoch": 0.21840271072174722, "grad_norm": 0.07860869212431895, "learning_rate": 0.00019971497287036202, "loss": 0.8552, "step": 12440 }, { "epoch": 0.21857827560174864, "grad_norm": 0.07657208275967126, "learning_rate": 0.00019971412724934805, "loss": 0.856, "step": 12450 }, { "epoch": 0.21875384048175003, "grad_norm": 0.057294883032555526, "learning_rate": 0.00019971328037760722, "loss": 0.8731, "step": 12460 }, { "epoch": 0.21892940536175143, "grad_norm": 0.06237115675749068, "learning_rate": 0.00019971243225515016, "loss": 0.8689, "step": 12470 }, { "epoch": 0.21910497024175285, "grad_norm": 0.07578260272482426, "learning_rate": 0.00019971158288198767, "loss": 0.8623, "step": 12480 }, { "epoch": 0.21928053512175424, "grad_norm": 0.07574743210172352, "learning_rate": 0.00019971073225813054, "loss": 0.8592, "step": 12490 }, { "epoch": 0.21945610000175564, "grad_norm": 0.06403675321513853, "learning_rate": 0.00019970988038358947, "loss": 0.8643, "step": 12500 }, { "epoch": 0.21963166488175706, "grad_norm": 0.05222275074125202, "learning_rate": 0.00019970902725837533, "loss": 0.8596, "step": 12510 }, { "epoch": 0.21980722976175845, "grad_norm": 0.0952052446680442, "learning_rate": 0.0001997081728824989, "loss": 0.8668, "step": 12520 }, { "epoch": 0.21998279464175985, "grad_norm": 0.05646238437575283, "learning_rate": 0.00019970731725597098, "loss": 0.8697, "step": 12530 }, { "epoch": 0.22015835952176127, "grad_norm": 0.08764024551743951, "learning_rate": 0.00019970646037880242, "loss": 0.8566, "step": 12540 }, { "epoch": 0.22033392440176267, "grad_norm": 0.0765120318770736, "learning_rate": 0.0001997056022510041, "loss": 0.8567, "step": 12550 }, { "epoch": 0.2205094892817641, "grad_norm": 0.0851652080867763, "learning_rate": 0.00019970474287258692, "loss": 0.8604, "step": 12560 }, { "epoch": 0.22068505416176548, "grad_norm": 0.06982621236973152, "learning_rate": 0.0001997038822435617, "loss": 0.8584, "step": 12570 }, { "epoch": 0.22086061904176688, "grad_norm": 0.07995651231900665, "learning_rate": 0.00019970302036393937, "loss": 0.8638, "step": 12580 }, { "epoch": 0.2210361839217683, "grad_norm": 0.06480861525741188, "learning_rate": 0.00019970215723373087, "loss": 0.858, "step": 12590 }, { "epoch": 0.2212117488017697, "grad_norm": 0.07618966053396582, "learning_rate": 0.00019970129285294712, "loss": 0.8584, "step": 12600 }, { "epoch": 0.2213873136817711, "grad_norm": 0.049028905927910045, "learning_rate": 0.00019970042722159908, "loss": 0.86, "step": 12610 }, { "epoch": 0.2215628785617725, "grad_norm": 0.06602833397277362, "learning_rate": 0.0001996995603396977, "loss": 0.8631, "step": 12620 }, { "epoch": 0.2217384434417739, "grad_norm": 0.06458237149426653, "learning_rate": 0.00019969869220725397, "loss": 0.8551, "step": 12630 }, { "epoch": 0.22191400832177532, "grad_norm": 0.07705687012468981, "learning_rate": 0.00019969782282427888, "loss": 0.8578, "step": 12640 }, { "epoch": 0.22208957320177672, "grad_norm": 0.06688938074370633, "learning_rate": 0.00019969695219078352, "loss": 0.8566, "step": 12650 }, { "epoch": 0.2222651380817781, "grad_norm": 0.07245910137021633, "learning_rate": 0.0001996960803067788, "loss": 0.8579, "step": 12660 }, { "epoch": 0.22244070296177954, "grad_norm": 0.05366959463862416, "learning_rate": 0.00019969520717227587, "loss": 0.8657, "step": 12670 }, { "epoch": 0.22261626784178093, "grad_norm": 0.0671181956840021, "learning_rate": 0.0001996943327872857, "loss": 0.8582, "step": 12680 }, { "epoch": 0.22279183272178232, "grad_norm": 0.06790553814166925, "learning_rate": 0.00019969345715181947, "loss": 0.8598, "step": 12690 }, { "epoch": 0.22296739760178375, "grad_norm": 0.08797915280816915, "learning_rate": 0.0001996925802658882, "loss": 0.8552, "step": 12700 }, { "epoch": 0.22314296248178514, "grad_norm": 0.08025084909632477, "learning_rate": 0.00019969170212950303, "loss": 0.8596, "step": 12710 }, { "epoch": 0.22331852736178656, "grad_norm": 0.06973590331103854, "learning_rate": 0.00019969082274267508, "loss": 0.8561, "step": 12720 }, { "epoch": 0.22349409224178796, "grad_norm": 0.10077642434926494, "learning_rate": 0.00019968994210541548, "loss": 0.8638, "step": 12730 }, { "epoch": 0.22366965712178935, "grad_norm": 0.0684252453798082, "learning_rate": 0.00019968906021773539, "loss": 0.8571, "step": 12740 }, { "epoch": 0.22384522200179077, "grad_norm": 0.05483887410424092, "learning_rate": 0.00019968817707964603, "loss": 0.8582, "step": 12750 }, { "epoch": 0.22402078688179217, "grad_norm": 0.08146992809927943, "learning_rate": 0.00019968729269115854, "loss": 0.8673, "step": 12760 }, { "epoch": 0.22419635176179356, "grad_norm": 0.06169269993917642, "learning_rate": 0.00019968640705228413, "loss": 0.855, "step": 12770 }, { "epoch": 0.22437191664179498, "grad_norm": 0.11292028485617943, "learning_rate": 0.00019968552016303405, "loss": 0.8597, "step": 12780 }, { "epoch": 0.22454748152179638, "grad_norm": 0.08681227251317894, "learning_rate": 0.00019968463202341946, "loss": 0.8586, "step": 12790 }, { "epoch": 0.22472304640179777, "grad_norm": 0.07958678278893613, "learning_rate": 0.0001996837426334517, "loss": 0.859, "step": 12800 }, { "epoch": 0.2248986112817992, "grad_norm": 0.08250478589411411, "learning_rate": 0.00019968285199314202, "loss": 0.8555, "step": 12810 }, { "epoch": 0.2250741761618006, "grad_norm": 0.06746346418802676, "learning_rate": 0.0001996819601025017, "loss": 0.8577, "step": 12820 }, { "epoch": 0.225249741041802, "grad_norm": 0.0660527416449139, "learning_rate": 0.000199681066961542, "loss": 0.8591, "step": 12830 }, { "epoch": 0.2254253059218034, "grad_norm": 0.05955655621897878, "learning_rate": 0.0001996801725702743, "loss": 0.8551, "step": 12840 }, { "epoch": 0.2256008708018048, "grad_norm": 0.06336162683197806, "learning_rate": 0.0001996792769287099, "loss": 0.8566, "step": 12850 }, { "epoch": 0.22577643568180622, "grad_norm": 0.09230503136106817, "learning_rate": 0.00019967838003686014, "loss": 0.8508, "step": 12860 }, { "epoch": 0.22595200056180761, "grad_norm": 0.08172690090519831, "learning_rate": 0.0001996774818947364, "loss": 0.8538, "step": 12870 }, { "epoch": 0.226127565441809, "grad_norm": 0.06363898315501915, "learning_rate": 0.00019967658250235004, "loss": 0.8656, "step": 12880 }, { "epoch": 0.22630313032181043, "grad_norm": 0.05474744709508439, "learning_rate": 0.00019967568185971245, "loss": 0.8625, "step": 12890 }, { "epoch": 0.22647869520181182, "grad_norm": 0.07630171477988831, "learning_rate": 0.0001996747799668351, "loss": 0.861, "step": 12900 }, { "epoch": 0.22665426008181325, "grad_norm": 0.08011111797927585, "learning_rate": 0.00019967387682372938, "loss": 0.8581, "step": 12910 }, { "epoch": 0.22682982496181464, "grad_norm": 0.10084510686913023, "learning_rate": 0.0001996729724304067, "loss": 0.8472, "step": 12920 }, { "epoch": 0.22700538984181604, "grad_norm": 0.06920932098291765, "learning_rate": 0.00019967206678687857, "loss": 0.8629, "step": 12930 }, { "epoch": 0.22718095472181746, "grad_norm": 0.09757733013923876, "learning_rate": 0.00019967115989315642, "loss": 0.8625, "step": 12940 }, { "epoch": 0.22735651960181885, "grad_norm": 0.06528613200478335, "learning_rate": 0.00019967025174925178, "loss": 0.8539, "step": 12950 }, { "epoch": 0.22753208448182025, "grad_norm": 0.07619606879973376, "learning_rate": 0.00019966934235517617, "loss": 0.8573, "step": 12960 }, { "epoch": 0.22770764936182167, "grad_norm": 0.059010347342820214, "learning_rate": 0.00019966843171094102, "loss": 0.8656, "step": 12970 }, { "epoch": 0.22788321424182306, "grad_norm": 0.05274377167490917, "learning_rate": 0.000199667519816558, "loss": 0.8572, "step": 12980 }, { "epoch": 0.22805877912182446, "grad_norm": 0.07310528239007595, "learning_rate": 0.00019966660667203852, "loss": 0.8648, "step": 12990 }, { "epoch": 0.22823434400182588, "grad_norm": 0.09604751435095704, "learning_rate": 0.00019966569227739426, "loss": 0.8553, "step": 13000 }, { "epoch": 0.22840990888182727, "grad_norm": 0.07868724830386786, "learning_rate": 0.00019966477663263679, "loss": 0.8638, "step": 13010 }, { "epoch": 0.2285854737618287, "grad_norm": 0.0807939148269027, "learning_rate": 0.00019966385973777765, "loss": 0.859, "step": 13020 }, { "epoch": 0.2287610386418301, "grad_norm": 0.06024600585240205, "learning_rate": 0.00019966294159282853, "loss": 0.8583, "step": 13030 }, { "epoch": 0.22893660352183148, "grad_norm": 0.05430868582058863, "learning_rate": 0.00019966202219780102, "loss": 0.8612, "step": 13040 }, { "epoch": 0.2291121684018329, "grad_norm": 0.059345834687725835, "learning_rate": 0.0001996611015527068, "loss": 0.8614, "step": 13050 }, { "epoch": 0.2292877332818343, "grad_norm": 0.07445285317788591, "learning_rate": 0.00019966017965755746, "loss": 0.8554, "step": 13060 }, { "epoch": 0.2294632981618357, "grad_norm": 0.08559837819380844, "learning_rate": 0.0001996592565123648, "loss": 0.8597, "step": 13070 }, { "epoch": 0.22963886304183712, "grad_norm": 0.10292629028492462, "learning_rate": 0.0001996583321171404, "loss": 0.8587, "step": 13080 }, { "epoch": 0.2298144279218385, "grad_norm": 0.09883399129712846, "learning_rate": 0.00019965740647189605, "loss": 0.8628, "step": 13090 }, { "epoch": 0.22998999280183993, "grad_norm": 0.06443772944297897, "learning_rate": 0.00019965647957664347, "loss": 0.8571, "step": 13100 }, { "epoch": 0.23016555768184133, "grad_norm": 0.08160493221035, "learning_rate": 0.00019965555143139436, "loss": 0.8638, "step": 13110 }, { "epoch": 0.23034112256184272, "grad_norm": 0.06517730571015831, "learning_rate": 0.0001996546220361605, "loss": 0.8633, "step": 13120 }, { "epoch": 0.23051668744184414, "grad_norm": 0.06574610269686215, "learning_rate": 0.00019965369139095368, "loss": 0.8684, "step": 13130 }, { "epoch": 0.23069225232184554, "grad_norm": 0.06441751554963561, "learning_rate": 0.0001996527594957857, "loss": 0.8664, "step": 13140 }, { "epoch": 0.23086781720184693, "grad_norm": 0.059154123938783665, "learning_rate": 0.00019965182635066836, "loss": 0.8596, "step": 13150 }, { "epoch": 0.23104338208184835, "grad_norm": 0.09180230055547017, "learning_rate": 0.00019965089195561343, "loss": 0.8657, "step": 13160 }, { "epoch": 0.23121894696184975, "grad_norm": 0.07403483364189829, "learning_rate": 0.0001996499563106328, "loss": 0.8632, "step": 13170 }, { "epoch": 0.23139451184185117, "grad_norm": 0.11631070950437476, "learning_rate": 0.00019964901941573834, "loss": 0.8585, "step": 13180 }, { "epoch": 0.23157007672185256, "grad_norm": 0.08959191594891236, "learning_rate": 0.0001996480812709419, "loss": 0.8671, "step": 13190 }, { "epoch": 0.23174564160185396, "grad_norm": 0.06440500271621241, "learning_rate": 0.00019964714187625535, "loss": 0.8567, "step": 13200 }, { "epoch": 0.23192120648185538, "grad_norm": 0.08028648687117654, "learning_rate": 0.00019964620123169062, "loss": 0.8535, "step": 13210 }, { "epoch": 0.23209677136185677, "grad_norm": 0.0908538771144053, "learning_rate": 0.00019964525933725962, "loss": 0.8603, "step": 13220 }, { "epoch": 0.23227233624185817, "grad_norm": 0.061861057542006596, "learning_rate": 0.00019964431619297424, "loss": 0.8639, "step": 13230 }, { "epoch": 0.2324479011218596, "grad_norm": 0.07480407813581291, "learning_rate": 0.00019964337179884648, "loss": 0.8595, "step": 13240 }, { "epoch": 0.23262346600186098, "grad_norm": 0.09391368857339884, "learning_rate": 0.00019964242615488833, "loss": 0.8603, "step": 13250 }, { "epoch": 0.23279903088186238, "grad_norm": 0.07434536311714536, "learning_rate": 0.0001996414792611117, "loss": 0.8604, "step": 13260 }, { "epoch": 0.2329745957618638, "grad_norm": 0.0713160482851545, "learning_rate": 0.00019964053111752865, "loss": 0.8569, "step": 13270 }, { "epoch": 0.2331501606418652, "grad_norm": 0.06815794896120089, "learning_rate": 0.00019963958172415115, "loss": 0.8671, "step": 13280 }, { "epoch": 0.23332572552186662, "grad_norm": 0.09860457256832716, "learning_rate": 0.00019963863108099123, "loss": 0.8578, "step": 13290 }, { "epoch": 0.233501290401868, "grad_norm": 0.0817229894738131, "learning_rate": 0.00019963767918806098, "loss": 0.8501, "step": 13300 }, { "epoch": 0.2336768552818694, "grad_norm": 0.06633499747993062, "learning_rate": 0.0001996367260453724, "loss": 0.8583, "step": 13310 }, { "epoch": 0.23385242016187083, "grad_norm": 0.06309723602880987, "learning_rate": 0.00019963577165293763, "loss": 0.86, "step": 13320 }, { "epoch": 0.23402798504187222, "grad_norm": 0.08032290564622807, "learning_rate": 0.0001996348160107687, "loss": 0.8606, "step": 13330 }, { "epoch": 0.23420354992187362, "grad_norm": 0.08485843992118826, "learning_rate": 0.0001996338591188778, "loss": 0.8666, "step": 13340 }, { "epoch": 0.23437911480187504, "grad_norm": 0.059068062789073256, "learning_rate": 0.00019963290097727693, "loss": 0.8603, "step": 13350 }, { "epoch": 0.23455467968187643, "grad_norm": 0.0770980751586183, "learning_rate": 0.00019963194158597838, "loss": 0.8573, "step": 13360 }, { "epoch": 0.23473024456187785, "grad_norm": 0.057733363725603934, "learning_rate": 0.00019963098094499416, "loss": 0.8554, "step": 13370 }, { "epoch": 0.23490580944187925, "grad_norm": 0.059937879996093374, "learning_rate": 0.00019963001905433657, "loss": 0.8592, "step": 13380 }, { "epoch": 0.23508137432188064, "grad_norm": 0.0632224404908158, "learning_rate": 0.00019962905591401766, "loss": 0.8578, "step": 13390 }, { "epoch": 0.23525693920188206, "grad_norm": 0.08170670551388819, "learning_rate": 0.00019962809152404977, "loss": 0.8566, "step": 13400 }, { "epoch": 0.23543250408188346, "grad_norm": 0.07861434380510587, "learning_rate": 0.00019962712588444503, "loss": 0.8617, "step": 13410 }, { "epoch": 0.23560806896188485, "grad_norm": 0.08039197674765287, "learning_rate": 0.0001996261589952157, "loss": 0.8576, "step": 13420 }, { "epoch": 0.23578363384188628, "grad_norm": 0.09240022885979778, "learning_rate": 0.00019962519085637407, "loss": 0.865, "step": 13430 }, { "epoch": 0.23595919872188767, "grad_norm": 0.07986316395702651, "learning_rate": 0.00019962422146793234, "loss": 0.8565, "step": 13440 }, { "epoch": 0.2361347636018891, "grad_norm": 0.06574640524381067, "learning_rate": 0.00019962325082990284, "loss": 0.8602, "step": 13450 }, { "epoch": 0.23631032848189049, "grad_norm": 0.09117917135106308, "learning_rate": 0.00019962227894229783, "loss": 0.8567, "step": 13460 }, { "epoch": 0.23648589336189188, "grad_norm": 0.05627202574634041, "learning_rate": 0.00019962130580512965, "loss": 0.8615, "step": 13470 }, { "epoch": 0.2366614582418933, "grad_norm": 0.05519988425747287, "learning_rate": 0.00019962033141841064, "loss": 0.8668, "step": 13480 }, { "epoch": 0.2368370231218947, "grad_norm": 0.06553942555227904, "learning_rate": 0.0001996193557821531, "loss": 0.8623, "step": 13490 }, { "epoch": 0.2370125880018961, "grad_norm": 0.07534788219067134, "learning_rate": 0.00019961837889636942, "loss": 0.8549, "step": 13500 }, { "epoch": 0.2371881528818975, "grad_norm": 0.11385461067284922, "learning_rate": 0.00019961740076107203, "loss": 0.8501, "step": 13510 }, { "epoch": 0.2373637177618989, "grad_norm": 0.06704085408675474, "learning_rate": 0.00019961642137627324, "loss": 0.8582, "step": 13520 }, { "epoch": 0.2375392826419003, "grad_norm": 0.08298062875200847, "learning_rate": 0.00019961544074198552, "loss": 0.857, "step": 13530 }, { "epoch": 0.23771484752190172, "grad_norm": 0.06494741095446872, "learning_rate": 0.00019961445885822123, "loss": 0.8561, "step": 13540 }, { "epoch": 0.23789041240190312, "grad_norm": 0.053319854546262105, "learning_rate": 0.00019961347572499286, "loss": 0.8657, "step": 13550 }, { "epoch": 0.23806597728190454, "grad_norm": 0.052530665080692034, "learning_rate": 0.00019961249134231284, "loss": 0.8636, "step": 13560 }, { "epoch": 0.23824154216190593, "grad_norm": 0.10420331434630822, "learning_rate": 0.0001996115057101937, "loss": 0.8605, "step": 13570 }, { "epoch": 0.23841710704190733, "grad_norm": 0.07513008704617051, "learning_rate": 0.00019961051882864782, "loss": 0.8626, "step": 13580 }, { "epoch": 0.23859267192190875, "grad_norm": 0.07083012319519245, "learning_rate": 0.00019960953069768781, "loss": 0.8547, "step": 13590 }, { "epoch": 0.23876823680191014, "grad_norm": 0.07477453054855489, "learning_rate": 0.0001996085413173261, "loss": 0.8615, "step": 13600 }, { "epoch": 0.23894380168191154, "grad_norm": 0.07064858969473756, "learning_rate": 0.00019960755068757532, "loss": 0.8597, "step": 13610 }, { "epoch": 0.23911936656191296, "grad_norm": 0.06942207105254398, "learning_rate": 0.00019960655880844796, "loss": 0.856, "step": 13620 }, { "epoch": 0.23929493144191435, "grad_norm": 0.05685954542760512, "learning_rate": 0.00019960556567995662, "loss": 0.8626, "step": 13630 }, { "epoch": 0.23947049632191578, "grad_norm": 0.06726683239476987, "learning_rate": 0.00019960457130211386, "loss": 0.8635, "step": 13640 }, { "epoch": 0.23964606120191717, "grad_norm": 0.06784527467547917, "learning_rate": 0.00019960357567493228, "loss": 0.8678, "step": 13650 }, { "epoch": 0.23982162608191857, "grad_norm": 0.07891725868316742, "learning_rate": 0.0001996025787984245, "loss": 0.8626, "step": 13660 }, { "epoch": 0.23999719096192, "grad_norm": 0.08630732144929593, "learning_rate": 0.00019960158067260312, "loss": 0.8585, "step": 13670 }, { "epoch": 0.24017275584192138, "grad_norm": 0.06865507076556479, "learning_rate": 0.00019960058129748085, "loss": 0.8639, "step": 13680 }, { "epoch": 0.24034832072192278, "grad_norm": 0.06533006848821106, "learning_rate": 0.00019959958067307032, "loss": 0.8584, "step": 13690 }, { "epoch": 0.2405238856019242, "grad_norm": 0.09544176297944504, "learning_rate": 0.0001995985787993842, "loss": 0.8646, "step": 13700 }, { "epoch": 0.2406994504819256, "grad_norm": 0.07944397864382828, "learning_rate": 0.00019959757567643519, "loss": 0.8632, "step": 13710 }, { "epoch": 0.240875015361927, "grad_norm": 0.0724567204717732, "learning_rate": 0.00019959657130423598, "loss": 0.8555, "step": 13720 }, { "epoch": 0.2410505802419284, "grad_norm": 0.07475838880427423, "learning_rate": 0.00019959556568279932, "loss": 0.869, "step": 13730 }, { "epoch": 0.2412261451219298, "grad_norm": 0.0800508272004524, "learning_rate": 0.000199594558812138, "loss": 0.8662, "step": 13740 }, { "epoch": 0.24140171000193122, "grad_norm": 0.05910746291538407, "learning_rate": 0.00019959355069226463, "loss": 0.867, "step": 13750 }, { "epoch": 0.24157727488193262, "grad_norm": 0.06406163985301921, "learning_rate": 0.00019959254132319215, "loss": 0.8638, "step": 13760 }, { "epoch": 0.241752839761934, "grad_norm": 0.07003991908556866, "learning_rate": 0.00019959153070493325, "loss": 0.8642, "step": 13770 }, { "epoch": 0.24192840464193544, "grad_norm": 0.0948217313141039, "learning_rate": 0.00019959051883750077, "loss": 0.8598, "step": 13780 }, { "epoch": 0.24210396952193683, "grad_norm": 0.06983295127303583, "learning_rate": 0.00019958950572090752, "loss": 0.8567, "step": 13790 }, { "epoch": 0.24227953440193822, "grad_norm": 0.055347991214125467, "learning_rate": 0.00019958849135516635, "loss": 0.8616, "step": 13800 }, { "epoch": 0.24245509928193965, "grad_norm": 0.05283474101240031, "learning_rate": 0.00019958747574029006, "loss": 0.8635, "step": 13810 }, { "epoch": 0.24263066416194104, "grad_norm": 0.09519078802638069, "learning_rate": 0.0001995864588762916, "loss": 0.8598, "step": 13820 }, { "epoch": 0.24280622904194246, "grad_norm": 0.07189266335151935, "learning_rate": 0.00019958544076318377, "loss": 0.8677, "step": 13830 }, { "epoch": 0.24298179392194386, "grad_norm": 0.055292437018689106, "learning_rate": 0.00019958442140097956, "loss": 0.8526, "step": 13840 }, { "epoch": 0.24315735880194525, "grad_norm": 0.08381916632876933, "learning_rate": 0.00019958340078969183, "loss": 0.8661, "step": 13850 }, { "epoch": 0.24333292368194667, "grad_norm": 0.10620086717544801, "learning_rate": 0.00019958237892933348, "loss": 0.8608, "step": 13860 }, { "epoch": 0.24350848856194807, "grad_norm": 0.07391684586630098, "learning_rate": 0.00019958135581991753, "loss": 0.8531, "step": 13870 }, { "epoch": 0.24368405344194946, "grad_norm": 0.06797082916127659, "learning_rate": 0.00019958033146145692, "loss": 0.859, "step": 13880 }, { "epoch": 0.24385961832195088, "grad_norm": 0.062085425271156326, "learning_rate": 0.00019957930585396457, "loss": 0.8607, "step": 13890 }, { "epoch": 0.24403518320195228, "grad_norm": 0.06447247822345926, "learning_rate": 0.00019957827899745354, "loss": 0.8604, "step": 13900 }, { "epoch": 0.2442107480819537, "grad_norm": 0.06929102607381335, "learning_rate": 0.00019957725089193685, "loss": 0.8651, "step": 13910 }, { "epoch": 0.2443863129619551, "grad_norm": 0.08457797576370008, "learning_rate": 0.00019957622153742748, "loss": 0.8667, "step": 13920 }, { "epoch": 0.2445618778419565, "grad_norm": 0.07543369237807052, "learning_rate": 0.00019957519093393846, "loss": 0.8518, "step": 13930 }, { "epoch": 0.2447374427219579, "grad_norm": 0.05197133800374198, "learning_rate": 0.00019957415908148293, "loss": 0.8661, "step": 13940 }, { "epoch": 0.2449130076019593, "grad_norm": 0.06474494494041087, "learning_rate": 0.0001995731259800739, "loss": 0.8597, "step": 13950 }, { "epoch": 0.2450885724819607, "grad_norm": 0.05507811973767036, "learning_rate": 0.00019957209162972444, "loss": 0.8558, "step": 13960 }, { "epoch": 0.24526413736196212, "grad_norm": 0.06449006102083141, "learning_rate": 0.00019957105603044773, "loss": 0.8606, "step": 13970 }, { "epoch": 0.24543970224196351, "grad_norm": 0.06948425427336218, "learning_rate": 0.00019957001918225685, "loss": 0.8644, "step": 13980 }, { "epoch": 0.2456152671219649, "grad_norm": 0.07111062370139126, "learning_rate": 0.0001995689810851649, "loss": 0.8595, "step": 13990 }, { "epoch": 0.24579083200196633, "grad_norm": 0.08079611216972363, "learning_rate": 0.00019956794173918508, "loss": 0.8565, "step": 14000 }, { "epoch": 0.24596639688196772, "grad_norm": 0.06636650925602623, "learning_rate": 0.00019956690114433058, "loss": 0.8627, "step": 14010 }, { "epoch": 0.24614196176196915, "grad_norm": 0.06888002936063668, "learning_rate": 0.00019956585930061451, "loss": 0.8692, "step": 14020 }, { "epoch": 0.24631752664197054, "grad_norm": 0.0675547970497489, "learning_rate": 0.00019956481620805012, "loss": 0.8604, "step": 14030 }, { "epoch": 0.24649309152197194, "grad_norm": 0.06275054616704875, "learning_rate": 0.00019956377186665066, "loss": 0.8476, "step": 14040 }, { "epoch": 0.24666865640197336, "grad_norm": 0.08469841729288999, "learning_rate": 0.00019956272627642927, "loss": 0.8508, "step": 14050 }, { "epoch": 0.24684422128197475, "grad_norm": 0.06782748577518789, "learning_rate": 0.00019956167943739926, "loss": 0.8629, "step": 14060 }, { "epoch": 0.24701978616197615, "grad_norm": 0.1257202632698329, "learning_rate": 0.0001995606313495739, "loss": 0.8518, "step": 14070 }, { "epoch": 0.24719535104197757, "grad_norm": 0.08676896461838673, "learning_rate": 0.00019955958201296642, "loss": 0.8543, "step": 14080 }, { "epoch": 0.24737091592197896, "grad_norm": 0.11054184338165894, "learning_rate": 0.00019955853142759018, "loss": 0.8539, "step": 14090 }, { "epoch": 0.24754648080198038, "grad_norm": 0.06297914852774192, "learning_rate": 0.00019955747959345845, "loss": 0.8617, "step": 14100 }, { "epoch": 0.24772204568198178, "grad_norm": 0.059152322453887136, "learning_rate": 0.00019955642651058457, "loss": 0.8619, "step": 14110 }, { "epoch": 0.24789761056198317, "grad_norm": 0.0758873849376312, "learning_rate": 0.00019955537217898186, "loss": 0.8521, "step": 14120 }, { "epoch": 0.2480731754419846, "grad_norm": 0.051644108807882866, "learning_rate": 0.00019955431659866372, "loss": 0.8574, "step": 14130 }, { "epoch": 0.248248740321986, "grad_norm": 0.06901620691290002, "learning_rate": 0.00019955325976964348, "loss": 0.8534, "step": 14140 }, { "epoch": 0.24842430520198738, "grad_norm": 0.06283408010309022, "learning_rate": 0.00019955220169193456, "loss": 0.8548, "step": 14150 }, { "epoch": 0.2485998700819888, "grad_norm": 0.09762046283936589, "learning_rate": 0.00019955114236555035, "loss": 0.854, "step": 14160 }, { "epoch": 0.2487754349619902, "grad_norm": 0.07580064981869453, "learning_rate": 0.0001995500817905043, "loss": 0.8611, "step": 14170 }, { "epoch": 0.24895099984199162, "grad_norm": 0.07540993018317861, "learning_rate": 0.0001995490199668098, "loss": 0.8566, "step": 14180 }, { "epoch": 0.24912656472199302, "grad_norm": 0.08381911954618285, "learning_rate": 0.00019954795689448033, "loss": 0.8656, "step": 14190 }, { "epoch": 0.2493021296019944, "grad_norm": 0.08774567498476624, "learning_rate": 0.0001995468925735294, "loss": 0.8635, "step": 14200 }, { "epoch": 0.24947769448199583, "grad_norm": 0.0707726016112555, "learning_rate": 0.0001995458270039704, "loss": 0.8467, "step": 14210 }, { "epoch": 0.24965325936199723, "grad_norm": 0.08976732565043521, "learning_rate": 0.00019954476018581695, "loss": 0.8575, "step": 14220 }, { "epoch": 0.24982882424199862, "grad_norm": 0.07250109380827384, "learning_rate": 0.00019954369211908245, "loss": 0.8583, "step": 14230 }, { "epoch": 0.250004389122, "grad_norm": 0.055044823812773445, "learning_rate": 0.00019954262280378045, "loss": 0.8598, "step": 14240 }, { "epoch": 0.25017995400200144, "grad_norm": 0.07227214383739743, "learning_rate": 0.0001995415522399246, "loss": 0.8591, "step": 14250 }, { "epoch": 0.25035551888200286, "grad_norm": 0.0838073762374175, "learning_rate": 0.00019954048042752838, "loss": 0.8681, "step": 14260 }, { "epoch": 0.2505310837620042, "grad_norm": 0.06698892016465113, "learning_rate": 0.00019953940736660542, "loss": 0.8545, "step": 14270 }, { "epoch": 0.25070664864200565, "grad_norm": 0.0752340954482605, "learning_rate": 0.00019953833305716922, "loss": 0.853, "step": 14280 }, { "epoch": 0.25088221352200707, "grad_norm": 0.08350924890195188, "learning_rate": 0.00019953725749923348, "loss": 0.8627, "step": 14290 }, { "epoch": 0.25105777840200844, "grad_norm": 0.08450552930801697, "learning_rate": 0.0001995361806928118, "loss": 0.8551, "step": 14300 }, { "epoch": 0.25123334328200986, "grad_norm": 0.056231253469683766, "learning_rate": 0.00019953510263791784, "loss": 0.8516, "step": 14310 }, { "epoch": 0.2514089081620113, "grad_norm": 0.08440568554315536, "learning_rate": 0.00019953402333456525, "loss": 0.8627, "step": 14320 }, { "epoch": 0.2515844730420127, "grad_norm": 0.06958882240933123, "learning_rate": 0.0001995329427827677, "loss": 0.8623, "step": 14330 }, { "epoch": 0.25176003792201407, "grad_norm": 0.08809798929105665, "learning_rate": 0.00019953186098253887, "loss": 0.8567, "step": 14340 }, { "epoch": 0.2519356028020155, "grad_norm": 0.06397476853674225, "learning_rate": 0.00019953077793389247, "loss": 0.8696, "step": 14350 }, { "epoch": 0.2521111676820169, "grad_norm": 0.06087596368285615, "learning_rate": 0.00019952969363684227, "loss": 0.8554, "step": 14360 }, { "epoch": 0.2522867325620183, "grad_norm": 0.09796475742085116, "learning_rate": 0.0001995286080914019, "loss": 0.8549, "step": 14370 }, { "epoch": 0.2524622974420197, "grad_norm": 0.09323122966284848, "learning_rate": 0.00019952752129758525, "loss": 0.8562, "step": 14380 }, { "epoch": 0.2526378623220211, "grad_norm": 0.08104986213499887, "learning_rate": 0.000199526433255406, "loss": 0.8678, "step": 14390 }, { "epoch": 0.2528134272020225, "grad_norm": 0.0823092086972603, "learning_rate": 0.00019952534396487794, "loss": 0.8643, "step": 14400 }, { "epoch": 0.2529889920820239, "grad_norm": 0.06541238804776654, "learning_rate": 0.0001995242534260149, "loss": 0.8691, "step": 14410 }, { "epoch": 0.25316455696202533, "grad_norm": 0.06811580378784352, "learning_rate": 0.0001995231616388307, "loss": 0.8599, "step": 14420 }, { "epoch": 0.2533401218420267, "grad_norm": 0.052965301341770216, "learning_rate": 0.00019952206860333915, "loss": 0.8525, "step": 14430 }, { "epoch": 0.2535156867220281, "grad_norm": 0.08451420049721323, "learning_rate": 0.00019952097431955411, "loss": 0.8503, "step": 14440 }, { "epoch": 0.25369125160202954, "grad_norm": 0.10248483474458604, "learning_rate": 0.00019951987878748945, "loss": 0.862, "step": 14450 }, { "epoch": 0.2538668164820309, "grad_norm": 0.10038642706372371, "learning_rate": 0.00019951878200715904, "loss": 0.8652, "step": 14460 }, { "epoch": 0.25404238136203233, "grad_norm": 0.08031892442777946, "learning_rate": 0.00019951768397857676, "loss": 0.868, "step": 14470 }, { "epoch": 0.25421794624203375, "grad_norm": 0.09205453785237903, "learning_rate": 0.00019951658470175655, "loss": 0.8555, "step": 14480 }, { "epoch": 0.2543935111220351, "grad_norm": 0.060170175291020624, "learning_rate": 0.00019951548417671236, "loss": 0.8531, "step": 14490 }, { "epoch": 0.25456907600203654, "grad_norm": 0.08382637825923747, "learning_rate": 0.00019951438240345808, "loss": 0.8618, "step": 14500 }, { "epoch": 0.25474464088203796, "grad_norm": 0.0695712522637437, "learning_rate": 0.0001995132793820077, "loss": 0.8562, "step": 14510 }, { "epoch": 0.2549202057620394, "grad_norm": 0.0654489707017867, "learning_rate": 0.0001995121751123752, "loss": 0.8581, "step": 14520 }, { "epoch": 0.25509577064204075, "grad_norm": 0.068870240021273, "learning_rate": 0.00019951106959457455, "loss": 0.8598, "step": 14530 }, { "epoch": 0.2552713355220422, "grad_norm": 0.089192001381446, "learning_rate": 0.0001995099628286198, "loss": 0.8593, "step": 14540 }, { "epoch": 0.2554469004020436, "grad_norm": 0.07295650383382928, "learning_rate": 0.00019950885481452487, "loss": 0.8613, "step": 14550 }, { "epoch": 0.25562246528204496, "grad_norm": 0.08360592222962818, "learning_rate": 0.00019950774555230395, "loss": 0.8548, "step": 14560 }, { "epoch": 0.2557980301620464, "grad_norm": 0.08391169673849941, "learning_rate": 0.00019950663504197095, "loss": 0.8606, "step": 14570 }, { "epoch": 0.2559735950420478, "grad_norm": 0.10918252013656823, "learning_rate": 0.00019950552328354003, "loss": 0.8544, "step": 14580 }, { "epoch": 0.2561491599220492, "grad_norm": 0.08638902306163822, "learning_rate": 0.00019950441027702528, "loss": 0.856, "step": 14590 }, { "epoch": 0.2563247248020506, "grad_norm": 0.08630070903540663, "learning_rate": 0.00019950329602244076, "loss": 0.8591, "step": 14600 }, { "epoch": 0.256500289682052, "grad_norm": 0.08364492262500373, "learning_rate": 0.00019950218051980058, "loss": 0.8617, "step": 14610 }, { "epoch": 0.2566758545620534, "grad_norm": 0.05698496216139443, "learning_rate": 0.0001995010637691189, "loss": 0.8552, "step": 14620 }, { "epoch": 0.2568514194420548, "grad_norm": 0.06101855631838769, "learning_rate": 0.00019949994577040986, "loss": 0.8611, "step": 14630 }, { "epoch": 0.25702698432205623, "grad_norm": 0.06442491170136191, "learning_rate": 0.00019949882652368765, "loss": 0.8584, "step": 14640 }, { "epoch": 0.2572025492020576, "grad_norm": 0.0769865238760441, "learning_rate": 0.00019949770602896641, "loss": 0.8613, "step": 14650 }, { "epoch": 0.257378114082059, "grad_norm": 0.07344353739916515, "learning_rate": 0.00019949658428626037, "loss": 0.8579, "step": 14660 }, { "epoch": 0.25755367896206044, "grad_norm": 0.0674799196446192, "learning_rate": 0.0001994954612955837, "loss": 0.8661, "step": 14670 }, { "epoch": 0.25772924384206186, "grad_norm": 0.05989849292668042, "learning_rate": 0.00019949433705695072, "loss": 0.855, "step": 14680 }, { "epoch": 0.25790480872206323, "grad_norm": 0.08201718793868297, "learning_rate": 0.00019949321157037558, "loss": 0.8547, "step": 14690 }, { "epoch": 0.25808037360206465, "grad_norm": 0.10871927434689527, "learning_rate": 0.00019949208483587257, "loss": 0.8671, "step": 14700 }, { "epoch": 0.25825593848206607, "grad_norm": 0.07312195730501901, "learning_rate": 0.00019949095685345598, "loss": 0.8591, "step": 14710 }, { "epoch": 0.25843150336206744, "grad_norm": 0.06476372827452581, "learning_rate": 0.0001994898276231401, "loss": 0.8596, "step": 14720 }, { "epoch": 0.25860706824206886, "grad_norm": 0.06798667236769548, "learning_rate": 0.0001994886971449392, "loss": 0.8637, "step": 14730 }, { "epoch": 0.2587826331220703, "grad_norm": 0.064407349275021, "learning_rate": 0.00019948756541886766, "loss": 0.8557, "step": 14740 }, { "epoch": 0.25895819800207165, "grad_norm": 0.1185406828061444, "learning_rate": 0.00019948643244493977, "loss": 0.8576, "step": 14750 }, { "epoch": 0.25913376288207307, "grad_norm": 0.09598548925354039, "learning_rate": 0.0001994852982231699, "loss": 0.8503, "step": 14760 }, { "epoch": 0.2593093277620745, "grad_norm": 0.05763865742883013, "learning_rate": 0.00019948416275357244, "loss": 0.8626, "step": 14770 }, { "epoch": 0.25948489264207586, "grad_norm": 0.07027125494343812, "learning_rate": 0.00019948302603616175, "loss": 0.8672, "step": 14780 }, { "epoch": 0.2596604575220773, "grad_norm": 0.06563743906802047, "learning_rate": 0.00019948188807095225, "loss": 0.8627, "step": 14790 }, { "epoch": 0.2598360224020787, "grad_norm": 0.06751699640995153, "learning_rate": 0.00019948074885795834, "loss": 0.8561, "step": 14800 }, { "epoch": 0.26001158728208007, "grad_norm": 0.06416503390544841, "learning_rate": 0.00019947960839719454, "loss": 0.857, "step": 14810 }, { "epoch": 0.2601871521620815, "grad_norm": 0.10598730482068526, "learning_rate": 0.00019947846668867515, "loss": 0.8628, "step": 14820 }, { "epoch": 0.2603627170420829, "grad_norm": 0.08307887657328619, "learning_rate": 0.00019947732373241475, "loss": 0.8635, "step": 14830 }, { "epoch": 0.2605382819220843, "grad_norm": 0.07348607297169166, "learning_rate": 0.00019947617952842777, "loss": 0.8604, "step": 14840 }, { "epoch": 0.2607138468020857, "grad_norm": 0.07471403751065663, "learning_rate": 0.00019947503407672873, "loss": 0.8563, "step": 14850 }, { "epoch": 0.2608894116820871, "grad_norm": 0.0653038817964391, "learning_rate": 0.00019947388737733216, "loss": 0.8604, "step": 14860 }, { "epoch": 0.26106497656208855, "grad_norm": 0.08734429902691533, "learning_rate": 0.0001994727394302525, "loss": 0.8601, "step": 14870 }, { "epoch": 0.2612405414420899, "grad_norm": 0.09714174951971054, "learning_rate": 0.0001994715902355044, "loss": 0.8602, "step": 14880 }, { "epoch": 0.26141610632209134, "grad_norm": 0.05213582636621975, "learning_rate": 0.0001994704397931024, "loss": 0.8603, "step": 14890 }, { "epoch": 0.26159167120209276, "grad_norm": 0.09453060688591909, "learning_rate": 0.00019946928810306104, "loss": 0.8575, "step": 14900 }, { "epoch": 0.2617672360820941, "grad_norm": 0.06696302555144164, "learning_rate": 0.00019946813516539494, "loss": 0.8515, "step": 14910 }, { "epoch": 0.26194280096209555, "grad_norm": 0.07007177645375523, "learning_rate": 0.00019946698098011867, "loss": 0.8558, "step": 14920 }, { "epoch": 0.26211836584209697, "grad_norm": 0.07095748350563354, "learning_rate": 0.0001994658255472469, "loss": 0.8573, "step": 14930 }, { "epoch": 0.26229393072209833, "grad_norm": 0.08723730767261309, "learning_rate": 0.00019946466886679423, "loss": 0.8576, "step": 14940 }, { "epoch": 0.26246949560209976, "grad_norm": 0.06257371483317639, "learning_rate": 0.00019946351093877532, "loss": 0.8601, "step": 14950 }, { "epoch": 0.2626450604821012, "grad_norm": 0.07005616301223214, "learning_rate": 0.0001994623517632049, "loss": 0.8617, "step": 14960 }, { "epoch": 0.26282062536210254, "grad_norm": 0.10495167338404755, "learning_rate": 0.00019946119134009758, "loss": 0.8626, "step": 14970 }, { "epoch": 0.26299619024210397, "grad_norm": 0.07526928806416286, "learning_rate": 0.0001994600296694681, "loss": 0.8576, "step": 14980 }, { "epoch": 0.2631717551221054, "grad_norm": 0.09613534910303849, "learning_rate": 0.00019945886675133118, "loss": 0.8664, "step": 14990 }, { "epoch": 0.26334732000210676, "grad_norm": 0.07195775617282611, "learning_rate": 0.00019945770258570156, "loss": 0.8672, "step": 15000 }, { "epoch": 0.2635228848821082, "grad_norm": 0.0792644418192245, "learning_rate": 0.00019945653717259399, "loss": 0.8518, "step": 15010 }, { "epoch": 0.2636984497621096, "grad_norm": 0.08147981421247041, "learning_rate": 0.00019945537051202318, "loss": 0.8598, "step": 15020 }, { "epoch": 0.26387401464211097, "grad_norm": 0.06340849234358589, "learning_rate": 0.000199454202604004, "loss": 0.856, "step": 15030 }, { "epoch": 0.2640495795221124, "grad_norm": 0.04734024661268947, "learning_rate": 0.0001994530334485512, "loss": 0.861, "step": 15040 }, { "epoch": 0.2642251444021138, "grad_norm": 0.05809592723200135, "learning_rate": 0.00019945186304567956, "loss": 0.8604, "step": 15050 }, { "epoch": 0.26440070928211523, "grad_norm": 0.0760507148983711, "learning_rate": 0.00019945069139540398, "loss": 0.8486, "step": 15060 }, { "epoch": 0.2645762741621166, "grad_norm": 0.0809802331922669, "learning_rate": 0.00019944951849773924, "loss": 0.8625, "step": 15070 }, { "epoch": 0.264751839042118, "grad_norm": 0.0688588726644685, "learning_rate": 0.00019944834435270026, "loss": 0.8592, "step": 15080 }, { "epoch": 0.26492740392211944, "grad_norm": 0.08616665668888285, "learning_rate": 0.0001994471689603019, "loss": 0.8584, "step": 15090 }, { "epoch": 0.2651029688021208, "grad_norm": 0.08110949314089932, "learning_rate": 0.00019944599232055903, "loss": 0.8567, "step": 15100 }, { "epoch": 0.26527853368212223, "grad_norm": 0.07019644854833353, "learning_rate": 0.00019944481443348656, "loss": 0.857, "step": 15110 }, { "epoch": 0.26545409856212365, "grad_norm": 0.07357093181321922, "learning_rate": 0.00019944363529909943, "loss": 0.86, "step": 15120 }, { "epoch": 0.265629663442125, "grad_norm": 0.0817515902034547, "learning_rate": 0.00019944245491741258, "loss": 0.8679, "step": 15130 }, { "epoch": 0.26580522832212644, "grad_norm": 0.061286659231383246, "learning_rate": 0.00019944127328844097, "loss": 0.8588, "step": 15140 }, { "epoch": 0.26598079320212786, "grad_norm": 0.059173874132725804, "learning_rate": 0.00019944009041219954, "loss": 0.8604, "step": 15150 }, { "epoch": 0.26615635808212923, "grad_norm": 0.05197657774103479, "learning_rate": 0.0001994389062887033, "loss": 0.8554, "step": 15160 }, { "epoch": 0.26633192296213065, "grad_norm": 0.06808451894132131, "learning_rate": 0.00019943772091796726, "loss": 0.8608, "step": 15170 }, { "epoch": 0.2665074878421321, "grad_norm": 0.07047614635428266, "learning_rate": 0.0001994365343000064, "loss": 0.8555, "step": 15180 }, { "epoch": 0.26668305272213344, "grad_norm": 0.10508749893678086, "learning_rate": 0.00019943534643483585, "loss": 0.8618, "step": 15190 }, { "epoch": 0.26685861760213486, "grad_norm": 0.07136833586693185, "learning_rate": 0.00019943415732247055, "loss": 0.8591, "step": 15200 }, { "epoch": 0.2670341824821363, "grad_norm": 0.09983142544668047, "learning_rate": 0.00019943296696292562, "loss": 0.8585, "step": 15210 }, { "epoch": 0.26720974736213765, "grad_norm": 0.07251035341402826, "learning_rate": 0.00019943177535621613, "loss": 0.8621, "step": 15220 }, { "epoch": 0.2673853122421391, "grad_norm": 0.08439254746939306, "learning_rate": 0.00019943058250235717, "loss": 0.8585, "step": 15230 }, { "epoch": 0.2675608771221405, "grad_norm": 0.07012011605321045, "learning_rate": 0.00019942938840136388, "loss": 0.8533, "step": 15240 }, { "epoch": 0.2677364420021419, "grad_norm": 0.06644274019649771, "learning_rate": 0.00019942819305325137, "loss": 0.8554, "step": 15250 }, { "epoch": 0.2679120068821433, "grad_norm": 0.05553434813813748, "learning_rate": 0.00019942699645803482, "loss": 0.8567, "step": 15260 }, { "epoch": 0.2680875717621447, "grad_norm": 0.0615237408809731, "learning_rate": 0.00019942579861572933, "loss": 0.8576, "step": 15270 }, { "epoch": 0.2682631366421461, "grad_norm": 0.06356950031888757, "learning_rate": 0.0001994245995263501, "loss": 0.855, "step": 15280 }, { "epoch": 0.2684387015221475, "grad_norm": 0.05983926636846021, "learning_rate": 0.00019942339918991239, "loss": 0.8617, "step": 15290 }, { "epoch": 0.2686142664021489, "grad_norm": 0.07037096956391803, "learning_rate": 0.0001994221976064313, "loss": 0.8588, "step": 15300 }, { "epoch": 0.26878983128215034, "grad_norm": 0.06530545571099387, "learning_rate": 0.0001994209947759221, "loss": 0.8593, "step": 15310 }, { "epoch": 0.2689653961621517, "grad_norm": 0.059316112291684, "learning_rate": 0.00019941979069840003, "loss": 0.8662, "step": 15320 }, { "epoch": 0.2691409610421531, "grad_norm": 0.05477020282891137, "learning_rate": 0.00019941858537388037, "loss": 0.8613, "step": 15330 }, { "epoch": 0.26931652592215455, "grad_norm": 0.07017035524547499, "learning_rate": 0.00019941737880237837, "loss": 0.8565, "step": 15340 }, { "epoch": 0.2694920908021559, "grad_norm": 0.08967510436016346, "learning_rate": 0.00019941617098390933, "loss": 0.8558, "step": 15350 }, { "epoch": 0.26966765568215734, "grad_norm": 0.07798973540389004, "learning_rate": 0.00019941496191848852, "loss": 0.858, "step": 15360 }, { "epoch": 0.26984322056215876, "grad_norm": 0.06308067321717561, "learning_rate": 0.00019941375160613127, "loss": 0.8579, "step": 15370 }, { "epoch": 0.2700187854421601, "grad_norm": 0.08192827114395146, "learning_rate": 0.00019941254004685293, "loss": 0.8614, "step": 15380 }, { "epoch": 0.27019435032216155, "grad_norm": 0.05796423307785897, "learning_rate": 0.0001994113272406689, "loss": 0.8588, "step": 15390 }, { "epoch": 0.27036991520216297, "grad_norm": 0.061589070551759294, "learning_rate": 0.00019941011318759446, "loss": 0.8695, "step": 15400 }, { "epoch": 0.2705454800821644, "grad_norm": 0.07508114376145081, "learning_rate": 0.000199408897887645, "loss": 0.8664, "step": 15410 }, { "epoch": 0.27072104496216576, "grad_norm": 0.05938097155689713, "learning_rate": 0.00019940768134083596, "loss": 0.8591, "step": 15420 }, { "epoch": 0.2708966098421672, "grad_norm": 0.060566614201024854, "learning_rate": 0.00019940646354718274, "loss": 0.8593, "step": 15430 }, { "epoch": 0.2710721747221686, "grad_norm": 0.06092729151042539, "learning_rate": 0.00019940524450670077, "loss": 0.86, "step": 15440 }, { "epoch": 0.27124773960216997, "grad_norm": 0.09361074441279993, "learning_rate": 0.0001994040242194055, "loss": 0.8575, "step": 15450 }, { "epoch": 0.2714233044821714, "grad_norm": 0.08385137239710018, "learning_rate": 0.0001994028026853124, "loss": 0.8603, "step": 15460 }, { "epoch": 0.2715988693621728, "grad_norm": 0.06041815489567256, "learning_rate": 0.00019940157990443687, "loss": 0.8659, "step": 15470 }, { "epoch": 0.2717744342421742, "grad_norm": 0.05850804898985812, "learning_rate": 0.0001994003558767945, "loss": 0.8491, "step": 15480 }, { "epoch": 0.2719499991221756, "grad_norm": 0.06034553249868876, "learning_rate": 0.00019939913060240078, "loss": 0.873, "step": 15490 }, { "epoch": 0.272125564002177, "grad_norm": 0.06293535078141517, "learning_rate": 0.00019939790408127122, "loss": 0.8595, "step": 15500 }, { "epoch": 0.2723011288821784, "grad_norm": 0.0811121688892563, "learning_rate": 0.00019939667631342135, "loss": 0.8642, "step": 15510 }, { "epoch": 0.2724766937621798, "grad_norm": 0.06196933978325333, "learning_rate": 0.0001993954472988667, "loss": 0.858, "step": 15520 }, { "epoch": 0.27265225864218123, "grad_norm": 0.08701626229158647, "learning_rate": 0.00019939421703762288, "loss": 0.8583, "step": 15530 }, { "epoch": 0.2728278235221826, "grad_norm": 0.0763265601821762, "learning_rate": 0.0001993929855297055, "loss": 0.8569, "step": 15540 }, { "epoch": 0.273003388402184, "grad_norm": 0.09040556133898896, "learning_rate": 0.00019939175277513014, "loss": 0.8571, "step": 15550 }, { "epoch": 0.27317895328218544, "grad_norm": 0.10787783472187551, "learning_rate": 0.0001993905187739124, "loss": 0.8605, "step": 15560 }, { "epoch": 0.2733545181621868, "grad_norm": 0.05170503509826312, "learning_rate": 0.00019938928352606795, "loss": 0.8618, "step": 15570 }, { "epoch": 0.27353008304218823, "grad_norm": 0.0863280097679521, "learning_rate": 0.00019938804703161238, "loss": 0.8552, "step": 15580 }, { "epoch": 0.27370564792218965, "grad_norm": 0.0691094489284862, "learning_rate": 0.00019938680929056145, "loss": 0.8576, "step": 15590 }, { "epoch": 0.2738812128021911, "grad_norm": 0.063285310270822, "learning_rate": 0.00019938557030293075, "loss": 0.8642, "step": 15600 }, { "epoch": 0.27405677768219244, "grad_norm": 0.06332654858749048, "learning_rate": 0.00019938433006873603, "loss": 0.8619, "step": 15610 }, { "epoch": 0.27423234256219386, "grad_norm": 0.07587451469937638, "learning_rate": 0.00019938308858799298, "loss": 0.8627, "step": 15620 }, { "epoch": 0.2744079074421953, "grad_norm": 0.05670961516455953, "learning_rate": 0.00019938184586071737, "loss": 0.8561, "step": 15630 }, { "epoch": 0.27458347232219665, "grad_norm": 0.09779480659004015, "learning_rate": 0.0001993806018869249, "loss": 0.8614, "step": 15640 }, { "epoch": 0.2747590372021981, "grad_norm": 0.05622871037263839, "learning_rate": 0.00019937935666663133, "loss": 0.8564, "step": 15650 }, { "epoch": 0.2749346020821995, "grad_norm": 0.06997749513049606, "learning_rate": 0.00019937811019985247, "loss": 0.8535, "step": 15660 }, { "epoch": 0.27511016696220086, "grad_norm": 0.08100381783597384, "learning_rate": 0.00019937686248660413, "loss": 0.8561, "step": 15670 }, { "epoch": 0.2752857318422023, "grad_norm": 0.07749711760124976, "learning_rate": 0.0001993756135269021, "loss": 0.8556, "step": 15680 }, { "epoch": 0.2754612967222037, "grad_norm": 0.056564541601300394, "learning_rate": 0.00019937436332076217, "loss": 0.8585, "step": 15690 }, { "epoch": 0.2756368616022051, "grad_norm": 0.06651248399908018, "learning_rate": 0.00019937311186820017, "loss": 0.8625, "step": 15700 }, { "epoch": 0.2758124264822065, "grad_norm": 0.08598846150848699, "learning_rate": 0.00019937185916923201, "loss": 0.8623, "step": 15710 }, { "epoch": 0.2759879913622079, "grad_norm": 0.07862610369990877, "learning_rate": 0.00019937060522387358, "loss": 0.8566, "step": 15720 }, { "epoch": 0.2761635562422093, "grad_norm": 0.06960192857765705, "learning_rate": 0.0001993693500321407, "loss": 0.8614, "step": 15730 }, { "epoch": 0.2763391211222107, "grad_norm": 0.06732708537020778, "learning_rate": 0.00019936809359404926, "loss": 0.8585, "step": 15740 }, { "epoch": 0.27651468600221213, "grad_norm": 0.07534180119172672, "learning_rate": 0.00019936683590961526, "loss": 0.8596, "step": 15750 }, { "epoch": 0.2766902508822135, "grad_norm": 0.07088513712059127, "learning_rate": 0.00019936557697885463, "loss": 0.8656, "step": 15760 }, { "epoch": 0.2768658157622149, "grad_norm": 0.06972115601487668, "learning_rate": 0.00019936431680178324, "loss": 0.866, "step": 15770 }, { "epoch": 0.27704138064221634, "grad_norm": 0.07324793150015474, "learning_rate": 0.00019936305537841714, "loss": 0.8625, "step": 15780 }, { "epoch": 0.27721694552221776, "grad_norm": 0.07253579492218197, "learning_rate": 0.00019936179270877222, "loss": 0.8601, "step": 15790 }, { "epoch": 0.27739251040221913, "grad_norm": 0.0764964208725961, "learning_rate": 0.00019936052879286457, "loss": 0.8595, "step": 15800 }, { "epoch": 0.27756807528222055, "grad_norm": 0.06281302842248587, "learning_rate": 0.0001993592636307102, "loss": 0.8578, "step": 15810 }, { "epoch": 0.27774364016222197, "grad_norm": 0.07124531523815639, "learning_rate": 0.00019935799722232508, "loss": 0.8581, "step": 15820 }, { "epoch": 0.27791920504222334, "grad_norm": 0.07789560430737721, "learning_rate": 0.00019935672956772528, "loss": 0.8679, "step": 15830 }, { "epoch": 0.27809476992222476, "grad_norm": 0.07194036810212937, "learning_rate": 0.00019935546066692688, "loss": 0.8584, "step": 15840 }, { "epoch": 0.2782703348022262, "grad_norm": 0.06334317552377043, "learning_rate": 0.00019935419051994596, "loss": 0.8579, "step": 15850 }, { "epoch": 0.27844589968222755, "grad_norm": 0.07865531965696003, "learning_rate": 0.00019935291912679854, "loss": 0.8501, "step": 15860 }, { "epoch": 0.27862146456222897, "grad_norm": 0.08826749624793569, "learning_rate": 0.00019935164648750083, "loss": 0.8595, "step": 15870 }, { "epoch": 0.2787970294422304, "grad_norm": 0.06218307312058364, "learning_rate": 0.00019935037260206893, "loss": 0.8514, "step": 15880 }, { "epoch": 0.27897259432223176, "grad_norm": 0.07610431204157117, "learning_rate": 0.0001993490974705189, "loss": 0.8627, "step": 15890 }, { "epoch": 0.2791481592022332, "grad_norm": 0.0729606425154137, "learning_rate": 0.00019934782109286698, "loss": 0.8524, "step": 15900 }, { "epoch": 0.2793237240822346, "grad_norm": 0.06563619915488823, "learning_rate": 0.00019934654346912933, "loss": 0.8568, "step": 15910 }, { "epoch": 0.27949928896223597, "grad_norm": 0.0733487232391486, "learning_rate": 0.00019934526459932216, "loss": 0.8575, "step": 15920 }, { "epoch": 0.2796748538422374, "grad_norm": 0.07966694020584535, "learning_rate": 0.0001993439844834616, "loss": 0.8587, "step": 15930 }, { "epoch": 0.2798504187222388, "grad_norm": 0.08177977120850058, "learning_rate": 0.00019934270312156388, "loss": 0.8643, "step": 15940 }, { "epoch": 0.2800259836022402, "grad_norm": 0.07766388085649538, "learning_rate": 0.0001993414205136453, "loss": 0.8665, "step": 15950 }, { "epoch": 0.2802015484822416, "grad_norm": 0.09774588948139021, "learning_rate": 0.00019934013665972203, "loss": 0.8571, "step": 15960 }, { "epoch": 0.280377113362243, "grad_norm": 0.08825931357494128, "learning_rate": 0.00019933885155981045, "loss": 0.8643, "step": 15970 }, { "epoch": 0.28055267824224445, "grad_norm": 0.060966301096905894, "learning_rate": 0.00019933756521392675, "loss": 0.8532, "step": 15980 }, { "epoch": 0.2807282431222458, "grad_norm": 0.05961521169022152, "learning_rate": 0.00019933627762208723, "loss": 0.861, "step": 15990 }, { "epoch": 0.28090380800224724, "grad_norm": 0.06176946856910091, "learning_rate": 0.00019933498878430824, "loss": 0.8616, "step": 16000 }, { "epoch": 0.28107937288224866, "grad_norm": 0.07452369399288046, "learning_rate": 0.00019933369870060613, "loss": 0.8587, "step": 16010 }, { "epoch": 0.28125493776225, "grad_norm": 0.07487060103204875, "learning_rate": 0.0001993324073709972, "loss": 0.8659, "step": 16020 }, { "epoch": 0.28143050264225145, "grad_norm": 0.08865705455486707, "learning_rate": 0.00019933111479549778, "loss": 0.8575, "step": 16030 }, { "epoch": 0.28160606752225287, "grad_norm": 0.06922714460907559, "learning_rate": 0.00019932982097412428, "loss": 0.8653, "step": 16040 }, { "epoch": 0.28178163240225423, "grad_norm": 0.05603644345233025, "learning_rate": 0.00019932852590689317, "loss": 0.8525, "step": 16050 }, { "epoch": 0.28195719728225566, "grad_norm": 0.08778585024463534, "learning_rate": 0.00019932722959382077, "loss": 0.8585, "step": 16060 }, { "epoch": 0.2821327621622571, "grad_norm": 0.07017282190283822, "learning_rate": 0.0001993259320349235, "loss": 0.8588, "step": 16070 }, { "epoch": 0.28230832704225844, "grad_norm": 0.09574002950131953, "learning_rate": 0.00019932463323021787, "loss": 0.8645, "step": 16080 }, { "epoch": 0.28248389192225987, "grad_norm": 0.08459664422323991, "learning_rate": 0.00019932333317972028, "loss": 0.8596, "step": 16090 }, { "epoch": 0.2826594568022613, "grad_norm": 0.05714972287323693, "learning_rate": 0.00019932203188344718, "loss": 0.859, "step": 16100 }, { "epoch": 0.28283502168226266, "grad_norm": 0.055828747395216934, "learning_rate": 0.00019932072934141513, "loss": 0.8623, "step": 16110 }, { "epoch": 0.2830105865622641, "grad_norm": 0.107651825727822, "learning_rate": 0.00019931942555364055, "loss": 0.8573, "step": 16120 }, { "epoch": 0.2831861514422655, "grad_norm": 0.0799038212972373, "learning_rate": 0.00019931812052014004, "loss": 0.8524, "step": 16130 }, { "epoch": 0.2833617163222669, "grad_norm": 0.04896050515286679, "learning_rate": 0.0001993168142409301, "loss": 0.8573, "step": 16140 }, { "epoch": 0.2835372812022683, "grad_norm": 0.07057822790250297, "learning_rate": 0.00019931550671602733, "loss": 0.8576, "step": 16150 }, { "epoch": 0.2837128460822697, "grad_norm": 0.07031964074150342, "learning_rate": 0.0001993141979454482, "loss": 0.8584, "step": 16160 }, { "epoch": 0.28388841096227113, "grad_norm": 0.10147315193914687, "learning_rate": 0.0001993128879292093, "loss": 0.8567, "step": 16170 }, { "epoch": 0.2840639758422725, "grad_norm": 0.07753844648971414, "learning_rate": 0.0001993115766673273, "loss": 0.8574, "step": 16180 }, { "epoch": 0.2842395407222739, "grad_norm": 0.06569563339503433, "learning_rate": 0.0001993102641598188, "loss": 0.8591, "step": 16190 }, { "epoch": 0.28441510560227534, "grad_norm": 0.06536824680205941, "learning_rate": 0.0001993089504067004, "loss": 0.8569, "step": 16200 }, { "epoch": 0.2845906704822767, "grad_norm": 0.05978093871844364, "learning_rate": 0.00019930763540798872, "loss": 0.8647, "step": 16210 }, { "epoch": 0.28476623536227813, "grad_norm": 0.05891315416868927, "learning_rate": 0.00019930631916370052, "loss": 0.855, "step": 16220 }, { "epoch": 0.28494180024227955, "grad_norm": 0.08247141480163506, "learning_rate": 0.00019930500167385243, "loss": 0.87, "step": 16230 }, { "epoch": 0.2851173651222809, "grad_norm": 0.0738842759669392, "learning_rate": 0.00019930368293846106, "loss": 0.8497, "step": 16240 }, { "epoch": 0.28529293000228234, "grad_norm": 0.06795391646433357, "learning_rate": 0.00019930236295754322, "loss": 0.8611, "step": 16250 }, { "epoch": 0.28546849488228376, "grad_norm": 0.06347159406758775, "learning_rate": 0.00019930104173111561, "loss": 0.8612, "step": 16260 }, { "epoch": 0.28564405976228513, "grad_norm": 0.0672827112361949, "learning_rate": 0.00019929971925919496, "loss": 0.8609, "step": 16270 }, { "epoch": 0.28581962464228655, "grad_norm": 0.07063282346389584, "learning_rate": 0.00019929839554179803, "loss": 0.8665, "step": 16280 }, { "epoch": 0.285995189522288, "grad_norm": 0.058121357999863066, "learning_rate": 0.00019929707057894154, "loss": 0.8535, "step": 16290 }, { "epoch": 0.28617075440228934, "grad_norm": 0.062092745301839745, "learning_rate": 0.00019929574437064238, "loss": 0.8622, "step": 16300 }, { "epoch": 0.28634631928229076, "grad_norm": 0.08651549075544836, "learning_rate": 0.00019929441691691728, "loss": 0.8499, "step": 16310 }, { "epoch": 0.2865218841622922, "grad_norm": 0.06277721894575615, "learning_rate": 0.0001992930882177831, "loss": 0.8589, "step": 16320 }, { "epoch": 0.2866974490422936, "grad_norm": 0.07148467120649954, "learning_rate": 0.00019929175827325664, "loss": 0.8584, "step": 16330 }, { "epoch": 0.286873013922295, "grad_norm": 0.06975523540808198, "learning_rate": 0.00019929042708335476, "loss": 0.8615, "step": 16340 }, { "epoch": 0.2870485788022964, "grad_norm": 0.07329949169858321, "learning_rate": 0.00019928909464809436, "loss": 0.8596, "step": 16350 }, { "epoch": 0.2872241436822978, "grad_norm": 0.07214151524210616, "learning_rate": 0.00019928776096749226, "loss": 0.8644, "step": 16360 }, { "epoch": 0.2873997085622992, "grad_norm": 0.06071910447017084, "learning_rate": 0.00019928642604156538, "loss": 0.8569, "step": 16370 }, { "epoch": 0.2875752734423006, "grad_norm": 0.08380390160438374, "learning_rate": 0.0001992850898703307, "loss": 0.8589, "step": 16380 }, { "epoch": 0.287750838322302, "grad_norm": 0.06769896118293908, "learning_rate": 0.00019928375245380507, "loss": 0.852, "step": 16390 }, { "epoch": 0.2879264032023034, "grad_norm": 0.06060509382591061, "learning_rate": 0.00019928241379200544, "loss": 0.8546, "step": 16400 }, { "epoch": 0.2881019680823048, "grad_norm": 0.11592813564545848, "learning_rate": 0.00019928107388494882, "loss": 0.8555, "step": 16410 }, { "epoch": 0.28827753296230624, "grad_norm": 0.06779115149536825, "learning_rate": 0.00019927973273265213, "loss": 0.8568, "step": 16420 }, { "epoch": 0.2884530978423076, "grad_norm": 0.06632098100976402, "learning_rate": 0.0001992783903351324, "loss": 0.8563, "step": 16430 }, { "epoch": 0.288628662722309, "grad_norm": 0.06046294947863675, "learning_rate": 0.00019927704669240665, "loss": 0.8562, "step": 16440 }, { "epoch": 0.28880422760231045, "grad_norm": 0.06413339569445198, "learning_rate": 0.00019927570180449186, "loss": 0.8667, "step": 16450 }, { "epoch": 0.2889797924823118, "grad_norm": 0.06722794680718401, "learning_rate": 0.0001992743556714051, "loss": 0.8583, "step": 16460 }, { "epoch": 0.28915535736231324, "grad_norm": 0.06132257937348581, "learning_rate": 0.00019927300829316344, "loss": 0.8631, "step": 16470 }, { "epoch": 0.28933092224231466, "grad_norm": 0.07916561129219288, "learning_rate": 0.0001992716596697839, "loss": 0.8673, "step": 16480 }, { "epoch": 0.289506487122316, "grad_norm": 0.06322339176536311, "learning_rate": 0.0001992703098012836, "loss": 0.8655, "step": 16490 }, { "epoch": 0.28968205200231745, "grad_norm": 0.06698421710085138, "learning_rate": 0.00019926895868767967, "loss": 0.8627, "step": 16500 }, { "epoch": 0.28985761688231887, "grad_norm": 0.08615878226914123, "learning_rate": 0.00019926760632898917, "loss": 0.8657, "step": 16510 }, { "epoch": 0.2900331817623203, "grad_norm": 0.059103537065474984, "learning_rate": 0.0001992662527252293, "loss": 0.8662, "step": 16520 }, { "epoch": 0.29020874664232166, "grad_norm": 0.0647505348110548, "learning_rate": 0.00019926489787641715, "loss": 0.8605, "step": 16530 }, { "epoch": 0.2903843115223231, "grad_norm": 0.0659775997450497, "learning_rate": 0.0001992635417825699, "loss": 0.8534, "step": 16540 }, { "epoch": 0.2905598764023245, "grad_norm": 0.06431457970931055, "learning_rate": 0.00019926218444370477, "loss": 0.8607, "step": 16550 }, { "epoch": 0.29073544128232587, "grad_norm": 0.06582377814753178, "learning_rate": 0.00019926082585983892, "loss": 0.8652, "step": 16560 }, { "epoch": 0.2909110061623273, "grad_norm": 0.09443996030285609, "learning_rate": 0.0001992594660309896, "loss": 0.8589, "step": 16570 }, { "epoch": 0.2910865710423287, "grad_norm": 0.04913185577601539, "learning_rate": 0.00019925810495717396, "loss": 0.8622, "step": 16580 }, { "epoch": 0.2912621359223301, "grad_norm": 0.09867460473950097, "learning_rate": 0.00019925674263840932, "loss": 0.8544, "step": 16590 }, { "epoch": 0.2914377008023315, "grad_norm": 0.0577415072302105, "learning_rate": 0.00019925537907471293, "loss": 0.8579, "step": 16600 }, { "epoch": 0.2916132656823329, "grad_norm": 0.07736453174061995, "learning_rate": 0.00019925401426610205, "loss": 0.8627, "step": 16610 }, { "epoch": 0.2917888305623343, "grad_norm": 0.07465201078768827, "learning_rate": 0.000199252648212594, "loss": 0.8567, "step": 16620 }, { "epoch": 0.2919643954423357, "grad_norm": 0.07763032841320909, "learning_rate": 0.00019925128091420607, "loss": 0.8588, "step": 16630 }, { "epoch": 0.29213996032233713, "grad_norm": 0.08377313427211736, "learning_rate": 0.00019924991237095554, "loss": 0.8637, "step": 16640 }, { "epoch": 0.2923155252023385, "grad_norm": 0.06547195441916155, "learning_rate": 0.0001992485425828598, "loss": 0.8529, "step": 16650 }, { "epoch": 0.2924910900823399, "grad_norm": 0.06699176033960474, "learning_rate": 0.0001992471715499362, "loss": 0.8572, "step": 16660 }, { "epoch": 0.29266665496234134, "grad_norm": 0.06947559305268568, "learning_rate": 0.00019924579927220213, "loss": 0.8589, "step": 16670 }, { "epoch": 0.2928422198423427, "grad_norm": 0.06478367052411628, "learning_rate": 0.00019924442574967491, "loss": 0.8602, "step": 16680 }, { "epoch": 0.29301778472234413, "grad_norm": 0.07899488728921693, "learning_rate": 0.00019924305098237207, "loss": 0.8638, "step": 16690 }, { "epoch": 0.29319334960234555, "grad_norm": 0.0704216527676942, "learning_rate": 0.00019924167497031088, "loss": 0.8601, "step": 16700 }, { "epoch": 0.293368914482347, "grad_norm": 0.06696349547699972, "learning_rate": 0.00019924029771350884, "loss": 0.8495, "step": 16710 }, { "epoch": 0.29354447936234834, "grad_norm": 0.06861727127727847, "learning_rate": 0.0001992389192119834, "loss": 0.8667, "step": 16720 }, { "epoch": 0.29372004424234976, "grad_norm": 0.09100653536155183, "learning_rate": 0.00019923753946575205, "loss": 0.8613, "step": 16730 }, { "epoch": 0.2938956091223512, "grad_norm": 0.08516117245094779, "learning_rate": 0.00019923615847483226, "loss": 0.8548, "step": 16740 }, { "epoch": 0.29407117400235255, "grad_norm": 0.08357640798814285, "learning_rate": 0.00019923477623924146, "loss": 0.8479, "step": 16750 }, { "epoch": 0.294246738882354, "grad_norm": 0.09654185042046867, "learning_rate": 0.00019923339275899725, "loss": 0.8557, "step": 16760 }, { "epoch": 0.2944223037623554, "grad_norm": 0.06481294399969062, "learning_rate": 0.00019923200803411716, "loss": 0.8598, "step": 16770 }, { "epoch": 0.29459786864235676, "grad_norm": 0.08702100131877259, "learning_rate": 0.00019923062206461868, "loss": 0.859, "step": 16780 }, { "epoch": 0.2947734335223582, "grad_norm": 0.07875094923759217, "learning_rate": 0.00019922923485051936, "loss": 0.8569, "step": 16790 }, { "epoch": 0.2949489984023596, "grad_norm": 0.07803535413144201, "learning_rate": 0.00019922784639183683, "loss": 0.8574, "step": 16800 }, { "epoch": 0.295124563282361, "grad_norm": 0.08663842115177851, "learning_rate": 0.00019922645668858866, "loss": 0.851, "step": 16810 }, { "epoch": 0.2953001281623624, "grad_norm": 0.06155337318504414, "learning_rate": 0.00019922506574079245, "loss": 0.8577, "step": 16820 }, { "epoch": 0.2954756930423638, "grad_norm": 0.07154136568963751, "learning_rate": 0.00019922367354846586, "loss": 0.8507, "step": 16830 }, { "epoch": 0.2956512579223652, "grad_norm": 0.06545651725789856, "learning_rate": 0.00019922228011162648, "loss": 0.8514, "step": 16840 }, { "epoch": 0.2958268228023666, "grad_norm": 0.08859357522122482, "learning_rate": 0.000199220885430292, "loss": 0.8692, "step": 16850 }, { "epoch": 0.29600238768236803, "grad_norm": 0.07397315354605344, "learning_rate": 0.00019921948950448007, "loss": 0.8639, "step": 16860 }, { "epoch": 0.29617795256236945, "grad_norm": 0.06291299047675504, "learning_rate": 0.0001992180923342084, "loss": 0.8586, "step": 16870 }, { "epoch": 0.2963535174423708, "grad_norm": 0.07169404656720314, "learning_rate": 0.00019921669391949467, "loss": 0.8639, "step": 16880 }, { "epoch": 0.29652908232237224, "grad_norm": 0.0744899877435283, "learning_rate": 0.0001992152942603566, "loss": 0.845, "step": 16890 }, { "epoch": 0.29670464720237366, "grad_norm": 0.06687562088109748, "learning_rate": 0.0001992138933568119, "loss": 0.8582, "step": 16900 }, { "epoch": 0.29688021208237503, "grad_norm": 0.07270881120810614, "learning_rate": 0.00019921249120887839, "loss": 0.8524, "step": 16910 }, { "epoch": 0.29705577696237645, "grad_norm": 0.0678215044805766, "learning_rate": 0.00019921108781657376, "loss": 0.8636, "step": 16920 }, { "epoch": 0.29723134184237787, "grad_norm": 0.08610893492573472, "learning_rate": 0.00019920968317991587, "loss": 0.8653, "step": 16930 }, { "epoch": 0.29740690672237924, "grad_norm": 0.06309889571476862, "learning_rate": 0.0001992082772989224, "loss": 0.8644, "step": 16940 }, { "epoch": 0.29758247160238066, "grad_norm": 0.05470421567029037, "learning_rate": 0.0001992068701736113, "loss": 0.8593, "step": 16950 }, { "epoch": 0.2977580364823821, "grad_norm": 0.04860957472320607, "learning_rate": 0.00019920546180400033, "loss": 0.8564, "step": 16960 }, { "epoch": 0.29793360136238345, "grad_norm": 0.0702526392554524, "learning_rate": 0.0001992040521901073, "loss": 0.8558, "step": 16970 }, { "epoch": 0.29810916624238487, "grad_norm": 0.08985493456627004, "learning_rate": 0.00019920264133195013, "loss": 0.8532, "step": 16980 }, { "epoch": 0.2982847311223863, "grad_norm": 0.06304539931811852, "learning_rate": 0.0001992012292295467, "loss": 0.8592, "step": 16990 }, { "epoch": 0.29846029600238766, "grad_norm": 0.09218348123781539, "learning_rate": 0.00019919981588291486, "loss": 0.8501, "step": 17000 }, { "epoch": 0.2986358608823891, "grad_norm": 0.09462626201981794, "learning_rate": 0.0001991984012920725, "loss": 0.8673, "step": 17010 }, { "epoch": 0.2988114257623905, "grad_norm": 0.11136589147732778, "learning_rate": 0.00019919698545703762, "loss": 0.8671, "step": 17020 }, { "epoch": 0.29898699064239187, "grad_norm": 0.07055317378741263, "learning_rate": 0.00019919556837782806, "loss": 0.8658, "step": 17030 }, { "epoch": 0.2991625555223933, "grad_norm": 0.06353913678846304, "learning_rate": 0.0001991941500544619, "loss": 0.8591, "step": 17040 }, { "epoch": 0.2993381204023947, "grad_norm": 0.08783528045065599, "learning_rate": 0.000199192730486957, "loss": 0.8613, "step": 17050 }, { "epoch": 0.29951368528239614, "grad_norm": 0.08438569670921732, "learning_rate": 0.00019919130967533134, "loss": 0.8528, "step": 17060 }, { "epoch": 0.2996892501623975, "grad_norm": 0.049188022654736624, "learning_rate": 0.00019918988761960304, "loss": 0.8613, "step": 17070 }, { "epoch": 0.2998648150423989, "grad_norm": 0.06887747059256775, "learning_rate": 0.00019918846431979005, "loss": 0.8596, "step": 17080 }, { "epoch": 0.30004037992240035, "grad_norm": 0.06696502181644078, "learning_rate": 0.00019918703977591038, "loss": 0.8536, "step": 17090 }, { "epoch": 0.3002159448024017, "grad_norm": 0.07167892019821537, "learning_rate": 0.0001991856139879821, "loss": 0.854, "step": 17100 }, { "epoch": 0.30039150968240313, "grad_norm": 0.07750390849353687, "learning_rate": 0.00019918418695602326, "loss": 0.8554, "step": 17110 }, { "epoch": 0.30056707456240456, "grad_norm": 0.0668620297136799, "learning_rate": 0.00019918275868005195, "loss": 0.8585, "step": 17120 }, { "epoch": 0.3007426394424059, "grad_norm": 0.052513058674325125, "learning_rate": 0.00019918132916008628, "loss": 0.8551, "step": 17130 }, { "epoch": 0.30091820432240735, "grad_norm": 0.08099888815070892, "learning_rate": 0.00019917989839614435, "loss": 0.8733, "step": 17140 }, { "epoch": 0.30109376920240877, "grad_norm": 0.05718829265862999, "learning_rate": 0.00019917846638824426, "loss": 0.8623, "step": 17150 }, { "epoch": 0.30126933408241013, "grad_norm": 0.0812392235842015, "learning_rate": 0.00019917703313640425, "loss": 0.8498, "step": 17160 }, { "epoch": 0.30144489896241156, "grad_norm": 0.08263407315827623, "learning_rate": 0.00019917559864064238, "loss": 0.8556, "step": 17170 }, { "epoch": 0.301620463842413, "grad_norm": 0.07967774944594185, "learning_rate": 0.00019917416290097685, "loss": 0.8657, "step": 17180 }, { "epoch": 0.30179602872241434, "grad_norm": 0.06284616202748283, "learning_rate": 0.00019917272591742587, "loss": 0.8598, "step": 17190 }, { "epoch": 0.30197159360241577, "grad_norm": 0.10478372134480675, "learning_rate": 0.0001991712876900077, "loss": 0.8608, "step": 17200 }, { "epoch": 0.3021471584824172, "grad_norm": 0.06489720637914502, "learning_rate": 0.0001991698482187404, "loss": 0.8619, "step": 17210 }, { "epoch": 0.30232272336241856, "grad_norm": 0.07534906959035347, "learning_rate": 0.00019916840750364235, "loss": 0.8638, "step": 17220 }, { "epoch": 0.30249828824242, "grad_norm": 0.11156549485828265, "learning_rate": 0.00019916696554473176, "loss": 0.849, "step": 17230 }, { "epoch": 0.3026738531224214, "grad_norm": 0.10062595698844387, "learning_rate": 0.0001991655223420269, "loss": 0.8525, "step": 17240 }, { "epoch": 0.3028494180024228, "grad_norm": 0.07823919621851963, "learning_rate": 0.00019916407789554607, "loss": 0.8562, "step": 17250 }, { "epoch": 0.3030249828824242, "grad_norm": 0.08952754681649663, "learning_rate": 0.00019916263220530754, "loss": 0.8575, "step": 17260 }, { "epoch": 0.3032005477624256, "grad_norm": 0.07502104115937305, "learning_rate": 0.00019916118527132966, "loss": 0.8625, "step": 17270 }, { "epoch": 0.30337611264242703, "grad_norm": 0.06532144739713754, "learning_rate": 0.00019915973709363078, "loss": 0.8509, "step": 17280 }, { "epoch": 0.3035516775224284, "grad_norm": 0.06847630610909773, "learning_rate": 0.0001991582876722292, "loss": 0.8577, "step": 17290 }, { "epoch": 0.3037272424024298, "grad_norm": 0.07179995021054224, "learning_rate": 0.00019915683700714328, "loss": 0.8547, "step": 17300 }, { "epoch": 0.30390280728243124, "grad_norm": 0.08723655881858587, "learning_rate": 0.00019915538509839148, "loss": 0.8524, "step": 17310 }, { "epoch": 0.3040783721624326, "grad_norm": 0.07555707490277051, "learning_rate": 0.0001991539319459921, "loss": 0.8668, "step": 17320 }, { "epoch": 0.30425393704243403, "grad_norm": 0.06800552983974828, "learning_rate": 0.00019915247754996357, "loss": 0.8519, "step": 17330 }, { "epoch": 0.30442950192243545, "grad_norm": 0.059510888652118094, "learning_rate": 0.00019915102191032436, "loss": 0.849, "step": 17340 }, { "epoch": 0.3046050668024368, "grad_norm": 0.0658050726810535, "learning_rate": 0.0001991495650270929, "loss": 0.8503, "step": 17350 }, { "epoch": 0.30478063168243824, "grad_norm": 0.1026816509322282, "learning_rate": 0.0001991481069002876, "loss": 0.857, "step": 17360 }, { "epoch": 0.30495619656243966, "grad_norm": 0.059308694420321384, "learning_rate": 0.000199146647529927, "loss": 0.8588, "step": 17370 }, { "epoch": 0.30513176144244103, "grad_norm": 0.09572537693407832, "learning_rate": 0.0001991451869160296, "loss": 0.8623, "step": 17380 }, { "epoch": 0.30530732632244245, "grad_norm": 0.07795378892056264, "learning_rate": 0.00019914372505861386, "loss": 0.8653, "step": 17390 }, { "epoch": 0.3054828912024439, "grad_norm": 0.05674074853667246, "learning_rate": 0.0001991422619576983, "loss": 0.8601, "step": 17400 }, { "epoch": 0.30565845608244524, "grad_norm": 0.07886007423232559, "learning_rate": 0.00019914079761330146, "loss": 0.8608, "step": 17410 }, { "epoch": 0.30583402096244666, "grad_norm": 0.060614888083388524, "learning_rate": 0.0001991393320254419, "loss": 0.8571, "step": 17420 }, { "epoch": 0.3060095858424481, "grad_norm": 0.06769047754781277, "learning_rate": 0.00019913786519413823, "loss": 0.8597, "step": 17430 }, { "epoch": 0.3061851507224495, "grad_norm": 0.10008464608534992, "learning_rate": 0.00019913639711940898, "loss": 0.8618, "step": 17440 }, { "epoch": 0.3063607156024509, "grad_norm": 0.10026201757117098, "learning_rate": 0.00019913492780127278, "loss": 0.8491, "step": 17450 }, { "epoch": 0.3065362804824523, "grad_norm": 0.10362134221346893, "learning_rate": 0.0001991334572397482, "loss": 0.8538, "step": 17460 }, { "epoch": 0.3067118453624537, "grad_norm": 0.05478538140852639, "learning_rate": 0.00019913198543485396, "loss": 0.8608, "step": 17470 }, { "epoch": 0.3068874102424551, "grad_norm": 0.06509852282065093, "learning_rate": 0.00019913051238660865, "loss": 0.86, "step": 17480 }, { "epoch": 0.3070629751224565, "grad_norm": 0.058674758172782095, "learning_rate": 0.00019912903809503094, "loss": 0.8526, "step": 17490 }, { "epoch": 0.3072385400024579, "grad_norm": 0.05536564637052904, "learning_rate": 0.00019912756256013946, "loss": 0.8603, "step": 17500 }, { "epoch": 0.3074141048824593, "grad_norm": 0.07654518144074346, "learning_rate": 0.000199126085781953, "loss": 0.8574, "step": 17510 }, { "epoch": 0.3075896697624607, "grad_norm": 0.06509695118041121, "learning_rate": 0.00019912460776049025, "loss": 0.8568, "step": 17520 }, { "epoch": 0.30776523464246214, "grad_norm": 0.06748571986621985, "learning_rate": 0.0001991231284957699, "loss": 0.8613, "step": 17530 }, { "epoch": 0.3079407995224635, "grad_norm": 0.07202895615321345, "learning_rate": 0.00019912164798781067, "loss": 0.8696, "step": 17540 }, { "epoch": 0.3081163644024649, "grad_norm": 0.08791907986656636, "learning_rate": 0.0001991201662366314, "loss": 0.8609, "step": 17550 }, { "epoch": 0.30829192928246635, "grad_norm": 0.08585888626608079, "learning_rate": 0.00019911868324225078, "loss": 0.8602, "step": 17560 }, { "epoch": 0.3084674941624677, "grad_norm": 0.06459070830729048, "learning_rate": 0.00019911719900468766, "loss": 0.8509, "step": 17570 }, { "epoch": 0.30864305904246914, "grad_norm": 0.06815495059520948, "learning_rate": 0.00019911571352396084, "loss": 0.8577, "step": 17580 }, { "epoch": 0.30881862392247056, "grad_norm": 0.06269367507707141, "learning_rate": 0.0001991142268000891, "loss": 0.854, "step": 17590 }, { "epoch": 0.308994188802472, "grad_norm": 0.07107986896930625, "learning_rate": 0.0001991127388330913, "loss": 0.8622, "step": 17600 }, { "epoch": 0.30916975368247335, "grad_norm": 0.10679766494016368, "learning_rate": 0.00019911124962298632, "loss": 0.8645, "step": 17610 }, { "epoch": 0.30934531856247477, "grad_norm": 0.07042292576948937, "learning_rate": 0.000199109759169793, "loss": 0.8527, "step": 17620 }, { "epoch": 0.3095208834424762, "grad_norm": 0.06506002728383325, "learning_rate": 0.0001991082674735302, "loss": 0.8634, "step": 17630 }, { "epoch": 0.30969644832247756, "grad_norm": 0.08377054110816094, "learning_rate": 0.00019910677453421687, "loss": 0.86, "step": 17640 }, { "epoch": 0.309872013202479, "grad_norm": 0.05981415279865544, "learning_rate": 0.0001991052803518719, "loss": 0.859, "step": 17650 }, { "epoch": 0.3100475780824804, "grad_norm": 0.07425654299348483, "learning_rate": 0.00019910378492651423, "loss": 0.8516, "step": 17660 }, { "epoch": 0.31022314296248177, "grad_norm": 0.08713602233396292, "learning_rate": 0.0001991022882581628, "loss": 0.8534, "step": 17670 }, { "epoch": 0.3103987078424832, "grad_norm": 0.10221955303412454, "learning_rate": 0.0001991007903468366, "loss": 0.8561, "step": 17680 }, { "epoch": 0.3105742727224846, "grad_norm": 0.0696522437510081, "learning_rate": 0.00019909929119255455, "loss": 0.8634, "step": 17690 }, { "epoch": 0.310749837602486, "grad_norm": 0.06316718621017117, "learning_rate": 0.00019909779079533568, "loss": 0.8625, "step": 17700 }, { "epoch": 0.3109254024824874, "grad_norm": 0.06028646350881662, "learning_rate": 0.00019909628915519903, "loss": 0.8566, "step": 17710 }, { "epoch": 0.3111009673624888, "grad_norm": 0.07510277518592742, "learning_rate": 0.0001990947862721636, "loss": 0.8627, "step": 17720 }, { "epoch": 0.3112765322424902, "grad_norm": 0.07040723154333153, "learning_rate": 0.0001990932821462484, "loss": 0.8586, "step": 17730 }, { "epoch": 0.3114520971224916, "grad_norm": 0.07085258611017076, "learning_rate": 0.00019909177677747254, "loss": 0.8504, "step": 17740 }, { "epoch": 0.31162766200249303, "grad_norm": 0.07237573593719533, "learning_rate": 0.000199090270165855, "loss": 0.8566, "step": 17750 }, { "epoch": 0.3118032268824944, "grad_norm": 0.08416570120820809, "learning_rate": 0.00019908876231141504, "loss": 0.8522, "step": 17760 }, { "epoch": 0.3119787917624958, "grad_norm": 0.07602909786548825, "learning_rate": 0.0001990872532141716, "loss": 0.8503, "step": 17770 }, { "epoch": 0.31215435664249724, "grad_norm": 0.11361552170769419, "learning_rate": 0.00019908574287414387, "loss": 0.8622, "step": 17780 }, { "epoch": 0.31232992152249867, "grad_norm": 0.05732009291944238, "learning_rate": 0.00019908423129135101, "loss": 0.8592, "step": 17790 }, { "epoch": 0.31250548640250003, "grad_norm": 0.07676230019903203, "learning_rate": 0.0001990827184658121, "loss": 0.867, "step": 17800 }, { "epoch": 0.31268105128250145, "grad_norm": 0.11433071449743344, "learning_rate": 0.00019908120439754637, "loss": 0.8577, "step": 17810 }, { "epoch": 0.3128566161625029, "grad_norm": 0.07224020343558185, "learning_rate": 0.000199079689086573, "loss": 0.8635, "step": 17820 }, { "epoch": 0.31303218104250424, "grad_norm": 0.06299465391345403, "learning_rate": 0.00019907817253291116, "loss": 0.8575, "step": 17830 }, { "epoch": 0.31320774592250566, "grad_norm": 0.0862599795849425, "learning_rate": 0.00019907665473658005, "loss": 0.8557, "step": 17840 }, { "epoch": 0.3133833108025071, "grad_norm": 0.06093418427124292, "learning_rate": 0.00019907513569759896, "loss": 0.8508, "step": 17850 }, { "epoch": 0.31355887568250845, "grad_norm": 0.05569976847357369, "learning_rate": 0.0001990736154159871, "loss": 0.8612, "step": 17860 }, { "epoch": 0.3137344405625099, "grad_norm": 0.07777719209226125, "learning_rate": 0.00019907209389176373, "loss": 0.8552, "step": 17870 }, { "epoch": 0.3139100054425113, "grad_norm": 0.11126040938286483, "learning_rate": 0.00019907057112494812, "loss": 0.8563, "step": 17880 }, { "epoch": 0.31408557032251266, "grad_norm": 0.07111194458004376, "learning_rate": 0.00019906904711555962, "loss": 0.859, "step": 17890 }, { "epoch": 0.3142611352025141, "grad_norm": 0.06964000774004818, "learning_rate": 0.0001990675218636175, "loss": 0.8602, "step": 17900 }, { "epoch": 0.3144367000825155, "grad_norm": 0.07183807291149802, "learning_rate": 0.00019906599536914104, "loss": 0.855, "step": 17910 }, { "epoch": 0.3146122649625169, "grad_norm": 0.06799537300445023, "learning_rate": 0.00019906446763214963, "loss": 0.8601, "step": 17920 }, { "epoch": 0.3147878298425183, "grad_norm": 0.05767954635122441, "learning_rate": 0.00019906293865266264, "loss": 0.8646, "step": 17930 }, { "epoch": 0.3149633947225197, "grad_norm": 0.06938395262092172, "learning_rate": 0.00019906140843069941, "loss": 0.8513, "step": 17940 }, { "epoch": 0.3151389596025211, "grad_norm": 0.07486366767299105, "learning_rate": 0.00019905987696627935, "loss": 0.859, "step": 17950 }, { "epoch": 0.3153145244825225, "grad_norm": 0.05086325506096764, "learning_rate": 0.00019905834425942184, "loss": 0.8601, "step": 17960 }, { "epoch": 0.31549008936252393, "grad_norm": 0.06417470099364238, "learning_rate": 0.00019905681031014635, "loss": 0.8596, "step": 17970 }, { "epoch": 0.31566565424252535, "grad_norm": 0.04795626461432263, "learning_rate": 0.00019905527511847226, "loss": 0.8547, "step": 17980 }, { "epoch": 0.3158412191225267, "grad_norm": 0.06989275421344479, "learning_rate": 0.00019905373868441906, "loss": 0.8629, "step": 17990 }, { "epoch": 0.31601678400252814, "grad_norm": 0.061028882691268965, "learning_rate": 0.00019905220100800616, "loss": 0.8486, "step": 18000 }, { "epoch": 0.31619234888252956, "grad_norm": 0.06198511050375044, "learning_rate": 0.0001990506620892531, "loss": 0.8643, "step": 18010 }, { "epoch": 0.31636791376253093, "grad_norm": 0.06156197078219974, "learning_rate": 0.00019904912192817938, "loss": 0.86, "step": 18020 }, { "epoch": 0.31654347864253235, "grad_norm": 0.1182654345658973, "learning_rate": 0.00019904758052480446, "loss": 0.856, "step": 18030 }, { "epoch": 0.31671904352253377, "grad_norm": 0.07426350090433857, "learning_rate": 0.00019904603787914793, "loss": 0.8577, "step": 18040 }, { "epoch": 0.31689460840253514, "grad_norm": 0.059168244742340954, "learning_rate": 0.0001990444939912293, "loss": 0.8699, "step": 18050 }, { "epoch": 0.31707017328253656, "grad_norm": 0.07263772935116553, "learning_rate": 0.00019904294886106814, "loss": 0.856, "step": 18060 }, { "epoch": 0.317245738162538, "grad_norm": 0.07141097643541501, "learning_rate": 0.00019904140248868404, "loss": 0.858, "step": 18070 }, { "epoch": 0.31742130304253935, "grad_norm": 0.05894409185367452, "learning_rate": 0.00019903985487409656, "loss": 0.8541, "step": 18080 }, { "epoch": 0.31759686792254077, "grad_norm": 0.05895798345974111, "learning_rate": 0.00019903830601732537, "loss": 0.852, "step": 18090 }, { "epoch": 0.3177724328025422, "grad_norm": 0.08563914289492862, "learning_rate": 0.00019903675591838999, "loss": 0.858, "step": 18100 }, { "epoch": 0.31794799768254356, "grad_norm": 0.07373284071021617, "learning_rate": 0.00019903520457731016, "loss": 0.8574, "step": 18110 }, { "epoch": 0.318123562562545, "grad_norm": 0.058213175727576075, "learning_rate": 0.0001990336519941055, "loss": 0.8627, "step": 18120 }, { "epoch": 0.3182991274425464, "grad_norm": 0.054839151913072176, "learning_rate": 0.00019903209816879566, "loss": 0.8596, "step": 18130 }, { "epoch": 0.31847469232254777, "grad_norm": 0.05646908482736233, "learning_rate": 0.00019903054310140038, "loss": 0.8623, "step": 18140 }, { "epoch": 0.3186502572025492, "grad_norm": 0.06568910156076978, "learning_rate": 0.00019902898679193933, "loss": 0.8627, "step": 18150 }, { "epoch": 0.3188258220825506, "grad_norm": 0.06944476454711082, "learning_rate": 0.00019902742924043224, "loss": 0.8668, "step": 18160 }, { "epoch": 0.31900138696255204, "grad_norm": 0.0827206568014568, "learning_rate": 0.00019902587044689878, "loss": 0.8664, "step": 18170 }, { "epoch": 0.3191769518425534, "grad_norm": 0.0593173034798514, "learning_rate": 0.00019902431041135882, "loss": 0.8599, "step": 18180 }, { "epoch": 0.3193525167225548, "grad_norm": 0.0680994657334782, "learning_rate": 0.00019902274913383204, "loss": 0.8575, "step": 18190 }, { "epoch": 0.31952808160255625, "grad_norm": 0.09619008431631026, "learning_rate": 0.00019902118661433823, "loss": 0.8597, "step": 18200 }, { "epoch": 0.3197036464825576, "grad_norm": 0.07539639264146879, "learning_rate": 0.0001990196228528972, "loss": 0.8551, "step": 18210 }, { "epoch": 0.31987921136255903, "grad_norm": 0.06975892124818792, "learning_rate": 0.00019901805784952877, "loss": 0.8602, "step": 18220 }, { "epoch": 0.32005477624256046, "grad_norm": 0.07999303788266972, "learning_rate": 0.00019901649160425274, "loss": 0.8567, "step": 18230 }, { "epoch": 0.3202303411225618, "grad_norm": 0.06975631504386215, "learning_rate": 0.00019901492411708897, "loss": 0.8554, "step": 18240 }, { "epoch": 0.32040590600256325, "grad_norm": 0.059367621579359084, "learning_rate": 0.0001990133553880574, "loss": 0.8576, "step": 18250 }, { "epoch": 0.32058147088256467, "grad_norm": 0.09477396874749154, "learning_rate": 0.00019901178541717774, "loss": 0.8558, "step": 18260 }, { "epoch": 0.32075703576256603, "grad_norm": 0.05565276169402761, "learning_rate": 0.00019901021420447003, "loss": 0.855, "step": 18270 }, { "epoch": 0.32093260064256746, "grad_norm": 0.07134908873078358, "learning_rate": 0.0001990086417499541, "loss": 0.8544, "step": 18280 }, { "epoch": 0.3211081655225689, "grad_norm": 0.06623312429410394, "learning_rate": 0.00019900706805364991, "loss": 0.853, "step": 18290 }, { "epoch": 0.32128373040257024, "grad_norm": 0.0761714554282892, "learning_rate": 0.00019900549311557738, "loss": 0.8639, "step": 18300 }, { "epoch": 0.32145929528257167, "grad_norm": 0.07523319931073931, "learning_rate": 0.00019900391693575647, "loss": 0.8564, "step": 18310 }, { "epoch": 0.3216348601625731, "grad_norm": 0.08698971223671158, "learning_rate": 0.00019900233951420716, "loss": 0.8529, "step": 18320 }, { "epoch": 0.3218104250425745, "grad_norm": 0.06765231040231456, "learning_rate": 0.00019900076085094942, "loss": 0.8584, "step": 18330 }, { "epoch": 0.3219859899225759, "grad_norm": 0.06027251232694273, "learning_rate": 0.00019899918094600324, "loss": 0.8656, "step": 18340 }, { "epoch": 0.3221615548025773, "grad_norm": 0.08446034534489896, "learning_rate": 0.0001989975997993887, "loss": 0.8601, "step": 18350 }, { "epoch": 0.3223371196825787, "grad_norm": 0.06157426466354918, "learning_rate": 0.00019899601741112574, "loss": 0.8596, "step": 18360 }, { "epoch": 0.3225126845625801, "grad_norm": 0.06931464666964303, "learning_rate": 0.0001989944337812345, "loss": 0.8585, "step": 18370 }, { "epoch": 0.3226882494425815, "grad_norm": 0.058178952835094506, "learning_rate": 0.00019899284890973498, "loss": 0.8492, "step": 18380 }, { "epoch": 0.32286381432258293, "grad_norm": 0.06517554404899108, "learning_rate": 0.00019899126279664727, "loss": 0.856, "step": 18390 }, { "epoch": 0.3230393792025843, "grad_norm": 0.07420983071030114, "learning_rate": 0.0001989896754419915, "loss": 0.8608, "step": 18400 }, { "epoch": 0.3232149440825857, "grad_norm": 0.049508529215607104, "learning_rate": 0.00019898808684578776, "loss": 0.8589, "step": 18410 }, { "epoch": 0.32339050896258714, "grad_norm": 0.05673206488508253, "learning_rate": 0.0001989864970080562, "loss": 0.8678, "step": 18420 }, { "epoch": 0.3235660738425885, "grad_norm": 0.05363425592466746, "learning_rate": 0.0001989849059288169, "loss": 0.8602, "step": 18430 }, { "epoch": 0.32374163872258993, "grad_norm": 0.07498576446654881, "learning_rate": 0.0001989833136080901, "loss": 0.8573, "step": 18440 }, { "epoch": 0.32391720360259135, "grad_norm": 0.07388062209430764, "learning_rate": 0.0001989817200458959, "loss": 0.8577, "step": 18450 }, { "epoch": 0.3240927684825927, "grad_norm": 0.052702807122935805, "learning_rate": 0.0001989801252422546, "loss": 0.8598, "step": 18460 }, { "epoch": 0.32426833336259414, "grad_norm": 0.06101344716131673, "learning_rate": 0.00019897852919718628, "loss": 0.8519, "step": 18470 }, { "epoch": 0.32444389824259556, "grad_norm": 0.08190698317015752, "learning_rate": 0.0001989769319107112, "loss": 0.8559, "step": 18480 }, { "epoch": 0.32461946312259693, "grad_norm": 0.06471139644855006, "learning_rate": 0.00019897533338284967, "loss": 0.8524, "step": 18490 }, { "epoch": 0.32479502800259835, "grad_norm": 0.06993863208512949, "learning_rate": 0.00019897373361362186, "loss": 0.8556, "step": 18500 }, { "epoch": 0.3249705928825998, "grad_norm": 0.05685831199441382, "learning_rate": 0.00019897213260304807, "loss": 0.8563, "step": 18510 }, { "epoch": 0.3251461577626012, "grad_norm": 0.082169275270938, "learning_rate": 0.00019897053035114858, "loss": 0.8509, "step": 18520 }, { "epoch": 0.32532172264260256, "grad_norm": 0.06795800317773747, "learning_rate": 0.00019896892685794372, "loss": 0.8611, "step": 18530 }, { "epoch": 0.325497287522604, "grad_norm": 0.06533294230706752, "learning_rate": 0.00019896732212345373, "loss": 0.8603, "step": 18540 }, { "epoch": 0.3256728524026054, "grad_norm": 0.0663909903654868, "learning_rate": 0.00019896571614769903, "loss": 0.8545, "step": 18550 }, { "epoch": 0.3258484172826068, "grad_norm": 0.058432993993101257, "learning_rate": 0.00019896410893069995, "loss": 0.8624, "step": 18560 }, { "epoch": 0.3260239821626082, "grad_norm": 0.057338400120719515, "learning_rate": 0.00019896250047247679, "loss": 0.8596, "step": 18570 }, { "epoch": 0.3261995470426096, "grad_norm": 0.0691883331033922, "learning_rate": 0.00019896089077305, "loss": 0.8537, "step": 18580 }, { "epoch": 0.326375111922611, "grad_norm": 0.1076136534823399, "learning_rate": 0.00019895927983243993, "loss": 0.8613, "step": 18590 }, { "epoch": 0.3265506768026124, "grad_norm": 0.06863770809012484, "learning_rate": 0.00019895766765066702, "loss": 0.8539, "step": 18600 }, { "epoch": 0.3267262416826138, "grad_norm": 0.09068751545991117, "learning_rate": 0.00019895605422775168, "loss": 0.8557, "step": 18610 }, { "epoch": 0.3269018065626152, "grad_norm": 0.06686752968304399, "learning_rate": 0.00019895443956371439, "loss": 0.8661, "step": 18620 }, { "epoch": 0.3270773714426166, "grad_norm": 0.055359035040326764, "learning_rate": 0.00019895282365857558, "loss": 0.8527, "step": 18630 }, { "epoch": 0.32725293632261804, "grad_norm": 0.0625405413744945, "learning_rate": 0.0001989512065123557, "loss": 0.861, "step": 18640 }, { "epoch": 0.3274285012026194, "grad_norm": 0.06414350771746707, "learning_rate": 0.00019894958812507525, "loss": 0.8556, "step": 18650 }, { "epoch": 0.3276040660826208, "grad_norm": 0.08495493156166287, "learning_rate": 0.00019894796849675476, "loss": 0.8586, "step": 18660 }, { "epoch": 0.32777963096262225, "grad_norm": 0.08596165564328906, "learning_rate": 0.00019894634762741473, "loss": 0.8568, "step": 18670 }, { "epoch": 0.3279551958426236, "grad_norm": 0.055024184339675646, "learning_rate": 0.0001989447255170757, "loss": 0.8529, "step": 18680 }, { "epoch": 0.32813076072262504, "grad_norm": 0.07209922716629169, "learning_rate": 0.00019894310216575826, "loss": 0.8538, "step": 18690 }, { "epoch": 0.32830632560262646, "grad_norm": 0.10023394572185754, "learning_rate": 0.00019894147757348293, "loss": 0.8593, "step": 18700 }, { "epoch": 0.3284818904826279, "grad_norm": 0.07267567278231189, "learning_rate": 0.00019893985174027027, "loss": 0.8468, "step": 18710 }, { "epoch": 0.32865745536262925, "grad_norm": 0.0738679250746881, "learning_rate": 0.00019893822466614098, "loss": 0.8634, "step": 18720 }, { "epoch": 0.32883302024263067, "grad_norm": 0.07264093967782095, "learning_rate": 0.00019893659635111558, "loss": 0.851, "step": 18730 }, { "epoch": 0.3290085851226321, "grad_norm": 0.0580287917377172, "learning_rate": 0.00019893496679521472, "loss": 0.8499, "step": 18740 }, { "epoch": 0.32918415000263346, "grad_norm": 0.09794938207603877, "learning_rate": 0.0001989333359984591, "loss": 0.8554, "step": 18750 }, { "epoch": 0.3293597148826349, "grad_norm": 0.06515141338612652, "learning_rate": 0.00019893170396086934, "loss": 0.8617, "step": 18760 }, { "epoch": 0.3295352797626363, "grad_norm": 0.0657968709223574, "learning_rate": 0.0001989300706824661, "loss": 0.8533, "step": 18770 }, { "epoch": 0.32971084464263767, "grad_norm": 0.06306917805254626, "learning_rate": 0.00019892843616327007, "loss": 0.8617, "step": 18780 }, { "epoch": 0.3298864095226391, "grad_norm": 0.06915814028661635, "learning_rate": 0.000198926800403302, "loss": 0.8633, "step": 18790 }, { "epoch": 0.3300619744026405, "grad_norm": 0.059676357862892604, "learning_rate": 0.00019892516340258264, "loss": 0.8546, "step": 18800 }, { "epoch": 0.3302375392826419, "grad_norm": 0.07677420656760534, "learning_rate": 0.00019892352516113267, "loss": 0.8458, "step": 18810 }, { "epoch": 0.3304131041626433, "grad_norm": 0.07882154446475968, "learning_rate": 0.00019892188567897289, "loss": 0.8473, "step": 18820 }, { "epoch": 0.3305886690426447, "grad_norm": 0.07975205553621646, "learning_rate": 0.00019892024495612401, "loss": 0.8604, "step": 18830 }, { "epoch": 0.3307642339226461, "grad_norm": 0.06523200004511905, "learning_rate": 0.0001989186029926069, "loss": 0.8581, "step": 18840 }, { "epoch": 0.3309397988026475, "grad_norm": 0.0614540754826004, "learning_rate": 0.00019891695978844232, "loss": 0.8632, "step": 18850 }, { "epoch": 0.33111536368264893, "grad_norm": 0.05899052228208036, "learning_rate": 0.00019891531534365107, "loss": 0.8599, "step": 18860 }, { "epoch": 0.3312909285626503, "grad_norm": 0.07394025237206421, "learning_rate": 0.00019891366965825403, "loss": 0.8672, "step": 18870 }, { "epoch": 0.3314664934426517, "grad_norm": 0.07548975259731722, "learning_rate": 0.000198912022732272, "loss": 0.8623, "step": 18880 }, { "epoch": 0.33164205832265314, "grad_norm": 0.06802945886093213, "learning_rate": 0.0001989103745657259, "loss": 0.8553, "step": 18890 }, { "epoch": 0.33181762320265457, "grad_norm": 0.08529910965996217, "learning_rate": 0.00019890872515863662, "loss": 0.8533, "step": 18900 }, { "epoch": 0.33199318808265593, "grad_norm": 0.07900977298707702, "learning_rate": 0.00019890707451102497, "loss": 0.8672, "step": 18910 }, { "epoch": 0.33216875296265735, "grad_norm": 0.09257008185764286, "learning_rate": 0.00019890542262291193, "loss": 0.86, "step": 18920 }, { "epoch": 0.3323443178426588, "grad_norm": 0.06244187074627285, "learning_rate": 0.00019890376949431845, "loss": 0.8652, "step": 18930 }, { "epoch": 0.33251988272266014, "grad_norm": 0.05624041558005601, "learning_rate": 0.00019890211512526545, "loss": 0.8615, "step": 18940 }, { "epoch": 0.33269544760266156, "grad_norm": 0.0617480131881405, "learning_rate": 0.00019890045951577386, "loss": 0.8482, "step": 18950 }, { "epoch": 0.332871012482663, "grad_norm": 0.06337386002724951, "learning_rate": 0.00019889880266586472, "loss": 0.8603, "step": 18960 }, { "epoch": 0.33304657736266435, "grad_norm": 0.07428524293212364, "learning_rate": 0.00019889714457555898, "loss": 0.866, "step": 18970 }, { "epoch": 0.3332221422426658, "grad_norm": 0.06460208542113219, "learning_rate": 0.00019889548524487763, "loss": 0.8565, "step": 18980 }, { "epoch": 0.3333977071226672, "grad_norm": 0.07133436468416435, "learning_rate": 0.00019889382467384173, "loss": 0.8584, "step": 18990 }, { "epoch": 0.33357327200266856, "grad_norm": 0.049696927161112826, "learning_rate": 0.0001988921628624723, "loss": 0.8702, "step": 19000 }, { "epoch": 0.33374883688267, "grad_norm": 0.06120569653143817, "learning_rate": 0.00019889049981079043, "loss": 0.8523, "step": 19010 }, { "epoch": 0.3339244017626714, "grad_norm": 0.06875605222135577, "learning_rate": 0.00019888883551881718, "loss": 0.8634, "step": 19020 }, { "epoch": 0.3340999666426728, "grad_norm": 0.07120057991833055, "learning_rate": 0.00019888716998657359, "loss": 0.8621, "step": 19030 }, { "epoch": 0.3342755315226742, "grad_norm": 0.07578951118508193, "learning_rate": 0.00019888550321408083, "loss": 0.8508, "step": 19040 }, { "epoch": 0.3344510964026756, "grad_norm": 0.09799541178275413, "learning_rate": 0.0001988838352013599, "loss": 0.8547, "step": 19050 }, { "epoch": 0.33462666128267704, "grad_norm": 0.06273564983196227, "learning_rate": 0.0001988821659484321, "loss": 0.8587, "step": 19060 }, { "epoch": 0.3348022261626784, "grad_norm": 0.07694243429423718, "learning_rate": 0.0001988804954553185, "loss": 0.8525, "step": 19070 }, { "epoch": 0.33497779104267983, "grad_norm": 0.062721001798842, "learning_rate": 0.00019887882372204022, "loss": 0.8516, "step": 19080 }, { "epoch": 0.33515335592268125, "grad_norm": 0.0818155119618455, "learning_rate": 0.00019887715074861853, "loss": 0.8588, "step": 19090 }, { "epoch": 0.3353289208026826, "grad_norm": 0.06557288465529827, "learning_rate": 0.00019887547653507455, "loss": 0.859, "step": 19100 }, { "epoch": 0.33550448568268404, "grad_norm": 0.06265468053727054, "learning_rate": 0.00019887380108142954, "loss": 0.8532, "step": 19110 }, { "epoch": 0.33568005056268546, "grad_norm": 0.08459570665241795, "learning_rate": 0.0001988721243877047, "loss": 0.8565, "step": 19120 }, { "epoch": 0.33585561544268683, "grad_norm": 0.07117709860027395, "learning_rate": 0.0001988704464539213, "loss": 0.8646, "step": 19130 }, { "epoch": 0.33603118032268825, "grad_norm": 0.06918889042790631, "learning_rate": 0.0001988687672801006, "loss": 0.8543, "step": 19140 }, { "epoch": 0.33620674520268967, "grad_norm": 0.05641660473064654, "learning_rate": 0.00019886708686626386, "loss": 0.8648, "step": 19150 }, { "epoch": 0.33638231008269104, "grad_norm": 0.06356431171466186, "learning_rate": 0.00019886540521243232, "loss": 0.8561, "step": 19160 }, { "epoch": 0.33655787496269246, "grad_norm": 0.06176508724472949, "learning_rate": 0.0001988637223186274, "loss": 0.854, "step": 19170 }, { "epoch": 0.3367334398426939, "grad_norm": 0.07941477904117106, "learning_rate": 0.0001988620381848703, "loss": 0.866, "step": 19180 }, { "epoch": 0.33690900472269525, "grad_norm": 0.097539352141863, "learning_rate": 0.00019886035281118248, "loss": 0.8571, "step": 19190 }, { "epoch": 0.33708456960269667, "grad_norm": 0.07822545800772135, "learning_rate": 0.00019885866619758522, "loss": 0.858, "step": 19200 }, { "epoch": 0.3372601344826981, "grad_norm": 0.07492605204460843, "learning_rate": 0.00019885697834409987, "loss": 0.8634, "step": 19210 }, { "epoch": 0.33743569936269946, "grad_norm": 0.0725855075048947, "learning_rate": 0.0001988552892507479, "loss": 0.8578, "step": 19220 }, { "epoch": 0.3376112642427009, "grad_norm": 0.06839137658634518, "learning_rate": 0.00019885359891755065, "loss": 0.8541, "step": 19230 }, { "epoch": 0.3377868291227023, "grad_norm": 0.08010319760528196, "learning_rate": 0.00019885190734452953, "loss": 0.8597, "step": 19240 }, { "epoch": 0.3379623940027037, "grad_norm": 0.1042104552223733, "learning_rate": 0.000198850214531706, "loss": 0.8602, "step": 19250 }, { "epoch": 0.3381379588827051, "grad_norm": 0.09303863814319994, "learning_rate": 0.0001988485204791015, "loss": 0.8599, "step": 19260 }, { "epoch": 0.3383135237627065, "grad_norm": 0.058872347367429065, "learning_rate": 0.00019884682518673752, "loss": 0.8597, "step": 19270 }, { "epoch": 0.33848908864270794, "grad_norm": 0.07542841309523092, "learning_rate": 0.00019884512865463546, "loss": 0.85, "step": 19280 }, { "epoch": 0.3386646535227093, "grad_norm": 0.07476841687870499, "learning_rate": 0.00019884343088281693, "loss": 0.8629, "step": 19290 }, { "epoch": 0.3388402184027107, "grad_norm": 0.07080523910555533, "learning_rate": 0.00019884173187130334, "loss": 0.8579, "step": 19300 }, { "epoch": 0.33901578328271215, "grad_norm": 0.06323463238820282, "learning_rate": 0.00019884003162011623, "loss": 0.8608, "step": 19310 }, { "epoch": 0.3391913481627135, "grad_norm": 0.06578883049271064, "learning_rate": 0.00019883833012927722, "loss": 0.8576, "step": 19320 }, { "epoch": 0.33936691304271493, "grad_norm": 0.0762327459427793, "learning_rate": 0.00019883662739880777, "loss": 0.8589, "step": 19330 }, { "epoch": 0.33954247792271636, "grad_norm": 0.0639584680801323, "learning_rate": 0.0001988349234287295, "loss": 0.862, "step": 19340 }, { "epoch": 0.3397180428027177, "grad_norm": 0.06860161690537506, "learning_rate": 0.00019883321821906406, "loss": 0.8633, "step": 19350 }, { "epoch": 0.33989360768271915, "grad_norm": 0.061219235355922934, "learning_rate": 0.00019883151176983295, "loss": 0.8637, "step": 19360 }, { "epoch": 0.34006917256272057, "grad_norm": 0.07219105485277513, "learning_rate": 0.00019882980408105784, "loss": 0.8638, "step": 19370 }, { "epoch": 0.34024473744272193, "grad_norm": 0.068214182492769, "learning_rate": 0.0001988280951527604, "loss": 0.8619, "step": 19380 }, { "epoch": 0.34042030232272336, "grad_norm": 0.06545387018373347, "learning_rate": 0.0001988263849849622, "loss": 0.8586, "step": 19390 }, { "epoch": 0.3405958672027248, "grad_norm": 0.07469779262237132, "learning_rate": 0.00019882467357768495, "loss": 0.8505, "step": 19400 }, { "epoch": 0.34077143208272614, "grad_norm": 0.06814175260656055, "learning_rate": 0.00019882296093095037, "loss": 0.8543, "step": 19410 }, { "epoch": 0.34094699696272757, "grad_norm": 0.06865376790400311, "learning_rate": 0.0001988212470447801, "loss": 0.8587, "step": 19420 }, { "epoch": 0.341122561842729, "grad_norm": 0.06299276954007664, "learning_rate": 0.00019881953191919593, "loss": 0.8587, "step": 19430 }, { "epoch": 0.3412981267227304, "grad_norm": 0.07484274614655176, "learning_rate": 0.00019881781555421949, "loss": 0.8614, "step": 19440 }, { "epoch": 0.3414736916027318, "grad_norm": 0.06984967965563989, "learning_rate": 0.00019881609794987262, "loss": 0.8613, "step": 19450 }, { "epoch": 0.3416492564827332, "grad_norm": 0.0780934288468703, "learning_rate": 0.00019881437910617702, "loss": 0.8531, "step": 19460 }, { "epoch": 0.3418248213627346, "grad_norm": 0.06033108308601824, "learning_rate": 0.00019881265902315447, "loss": 0.8571, "step": 19470 }, { "epoch": 0.342000386242736, "grad_norm": 0.055020852395432036, "learning_rate": 0.00019881093770082685, "loss": 0.8545, "step": 19480 }, { "epoch": 0.3421759511227374, "grad_norm": 0.06502461248464224, "learning_rate": 0.0001988092151392158, "loss": 0.8554, "step": 19490 }, { "epoch": 0.34235151600273883, "grad_norm": 0.08431985430216779, "learning_rate": 0.00019880749133834337, "loss": 0.861, "step": 19500 }, { "epoch": 0.3425270808827402, "grad_norm": 0.07290201849200763, "learning_rate": 0.00019880576629823117, "loss": 0.8596, "step": 19510 }, { "epoch": 0.3427026457627416, "grad_norm": 0.06599842265496005, "learning_rate": 0.00019880404001890124, "loss": 0.8536, "step": 19520 }, { "epoch": 0.34287821064274304, "grad_norm": 0.05105415106065, "learning_rate": 0.00019880231250037536, "loss": 0.8627, "step": 19530 }, { "epoch": 0.3430537755227444, "grad_norm": 0.07552749113078049, "learning_rate": 0.00019880058374267546, "loss": 0.8594, "step": 19540 }, { "epoch": 0.34322934040274583, "grad_norm": 0.07541343507894413, "learning_rate": 0.00019879885374582336, "loss": 0.8473, "step": 19550 }, { "epoch": 0.34340490528274725, "grad_norm": 0.08796876798333103, "learning_rate": 0.0001987971225098411, "loss": 0.8567, "step": 19560 }, { "epoch": 0.3435804701627486, "grad_norm": 0.05992163733142649, "learning_rate": 0.00019879539003475052, "loss": 0.8606, "step": 19570 }, { "epoch": 0.34375603504275004, "grad_norm": 0.0779392219255425, "learning_rate": 0.00019879365632057363, "loss": 0.8497, "step": 19580 }, { "epoch": 0.34393159992275146, "grad_norm": 0.07657760176427332, "learning_rate": 0.00019879192136733234, "loss": 0.8592, "step": 19590 }, { "epoch": 0.34410716480275283, "grad_norm": 0.09636905389619554, "learning_rate": 0.00019879018517504865, "loss": 0.8472, "step": 19600 }, { "epoch": 0.34428272968275425, "grad_norm": 0.09385584026768841, "learning_rate": 0.00019878844774374462, "loss": 0.8585, "step": 19610 }, { "epoch": 0.3444582945627557, "grad_norm": 0.06769690963694532, "learning_rate": 0.00019878670907344222, "loss": 0.8627, "step": 19620 }, { "epoch": 0.3446338594427571, "grad_norm": 0.0539661532233032, "learning_rate": 0.00019878496916416345, "loss": 0.8626, "step": 19630 }, { "epoch": 0.34480942432275846, "grad_norm": 0.05589894544911685, "learning_rate": 0.0001987832280159304, "loss": 0.8616, "step": 19640 }, { "epoch": 0.3449849892027599, "grad_norm": 0.0662786838428017, "learning_rate": 0.00019878148562876507, "loss": 0.8509, "step": 19650 }, { "epoch": 0.3451605540827613, "grad_norm": 0.06874158970078188, "learning_rate": 0.00019877974200268963, "loss": 0.851, "step": 19660 }, { "epoch": 0.3453361189627627, "grad_norm": 0.12275611337537763, "learning_rate": 0.0001987779971377261, "loss": 0.8567, "step": 19670 }, { "epoch": 0.3455116838427641, "grad_norm": 0.06640712973763466, "learning_rate": 0.0001987762510338966, "loss": 0.8545, "step": 19680 }, { "epoch": 0.3456872487227655, "grad_norm": 0.0664307905848072, "learning_rate": 0.00019877450369122326, "loss": 0.8594, "step": 19690 }, { "epoch": 0.3458628136027669, "grad_norm": 0.061385686251929666, "learning_rate": 0.00019877275510972822, "loss": 0.8556, "step": 19700 }, { "epoch": 0.3460383784827683, "grad_norm": 0.07348603857578694, "learning_rate": 0.00019877100528943362, "loss": 0.8589, "step": 19710 }, { "epoch": 0.3462139433627697, "grad_norm": 0.08435252076769903, "learning_rate": 0.00019876925423036168, "loss": 0.8619, "step": 19720 }, { "epoch": 0.3463895082427711, "grad_norm": 0.05701628579623089, "learning_rate": 0.00019876750193253452, "loss": 0.8617, "step": 19730 }, { "epoch": 0.3465650731227725, "grad_norm": 0.053009458911936376, "learning_rate": 0.0001987657483959744, "loss": 0.8629, "step": 19740 }, { "epoch": 0.34674063800277394, "grad_norm": 0.06494154887299836, "learning_rate": 0.00019876399362070348, "loss": 0.8567, "step": 19750 }, { "epoch": 0.3469162028827753, "grad_norm": 0.06252552744175231, "learning_rate": 0.00019876223760674407, "loss": 0.8585, "step": 19760 }, { "epoch": 0.3470917677627767, "grad_norm": 0.06468028372497749, "learning_rate": 0.00019876048035411833, "loss": 0.8595, "step": 19770 }, { "epoch": 0.34726733264277815, "grad_norm": 0.07421886930242856, "learning_rate": 0.00019875872186284857, "loss": 0.8552, "step": 19780 }, { "epoch": 0.34744289752277957, "grad_norm": 0.07157750975157508, "learning_rate": 0.0001987569621329571, "loss": 0.8604, "step": 19790 }, { "epoch": 0.34761846240278094, "grad_norm": 0.07600286289357912, "learning_rate": 0.00019875520116446616, "loss": 0.8664, "step": 19800 }, { "epoch": 0.34779402728278236, "grad_norm": 0.0925284187542818, "learning_rate": 0.0001987534389573981, "loss": 0.8605, "step": 19810 }, { "epoch": 0.3479695921627838, "grad_norm": 0.07120781055885743, "learning_rate": 0.00019875167551177523, "loss": 0.8613, "step": 19820 }, { "epoch": 0.34814515704278515, "grad_norm": 0.07128265897297884, "learning_rate": 0.0001987499108276199, "loss": 0.86, "step": 19830 }, { "epoch": 0.34832072192278657, "grad_norm": 0.07530020697530185, "learning_rate": 0.00019874814490495447, "loss": 0.8601, "step": 19840 }, { "epoch": 0.348496286802788, "grad_norm": 0.09638692670439293, "learning_rate": 0.0001987463777438013, "loss": 0.8589, "step": 19850 }, { "epoch": 0.34867185168278936, "grad_norm": 0.07599327635980532, "learning_rate": 0.0001987446093441828, "loss": 0.8557, "step": 19860 }, { "epoch": 0.3488474165627908, "grad_norm": 0.0642043758751764, "learning_rate": 0.00019874283970612136, "loss": 0.8537, "step": 19870 }, { "epoch": 0.3490229814427922, "grad_norm": 0.0991161460276962, "learning_rate": 0.00019874106882963942, "loss": 0.8599, "step": 19880 }, { "epoch": 0.34919854632279357, "grad_norm": 0.07033014614232033, "learning_rate": 0.00019873929671475937, "loss": 0.8641, "step": 19890 }, { "epoch": 0.349374111202795, "grad_norm": 0.05675025439440612, "learning_rate": 0.00019873752336150373, "loss": 0.8585, "step": 19900 }, { "epoch": 0.3495496760827964, "grad_norm": 0.05574624689821413, "learning_rate": 0.00019873574876989492, "loss": 0.8567, "step": 19910 }, { "epoch": 0.3497252409627978, "grad_norm": 0.06063040586012082, "learning_rate": 0.00019873397293995544, "loss": 0.8499, "step": 19920 }, { "epoch": 0.3499008058427992, "grad_norm": 0.06134876430093596, "learning_rate": 0.00019873219587170782, "loss": 0.8604, "step": 19930 }, { "epoch": 0.3500763707228006, "grad_norm": 0.06534915883068793, "learning_rate": 0.0001987304175651745, "loss": 0.856, "step": 19940 }, { "epoch": 0.350251935602802, "grad_norm": 0.07218691768826466, "learning_rate": 0.00019872863802037813, "loss": 0.875, "step": 19950 }, { "epoch": 0.3504275004828034, "grad_norm": 0.05975619007980095, "learning_rate": 0.00019872685723734112, "loss": 0.8587, "step": 19960 }, { "epoch": 0.35060306536280483, "grad_norm": 0.08557476009712692, "learning_rate": 0.00019872507521608615, "loss": 0.8574, "step": 19970 }, { "epoch": 0.35077863024280626, "grad_norm": 0.06472602172752759, "learning_rate": 0.00019872329195663572, "loss": 0.8583, "step": 19980 }, { "epoch": 0.3509541951228076, "grad_norm": 0.05756598727329916, "learning_rate": 0.00019872150745901246, "loss": 0.8569, "step": 19990 }, { "epoch": 0.35112976000280904, "grad_norm": 0.07935423533285582, "learning_rate": 0.00019871972172323899, "loss": 0.8586, "step": 20000 }, { "epoch": 0.35130532488281047, "grad_norm": 0.06027886554525522, "learning_rate": 0.0001987179347493379, "loss": 0.8562, "step": 20010 }, { "epoch": 0.35148088976281183, "grad_norm": 0.061797041752572425, "learning_rate": 0.00019871614653733184, "loss": 0.857, "step": 20020 }, { "epoch": 0.35165645464281325, "grad_norm": 0.07176249771241038, "learning_rate": 0.00019871435708724346, "loss": 0.8571, "step": 20030 }, { "epoch": 0.3518320195228147, "grad_norm": 0.062783318398205, "learning_rate": 0.0001987125663990955, "loss": 0.8605, "step": 20040 }, { "epoch": 0.35200758440281604, "grad_norm": 0.05826237709703035, "learning_rate": 0.00019871077447291055, "loss": 0.8565, "step": 20050 }, { "epoch": 0.35218314928281746, "grad_norm": 0.0538422186561053, "learning_rate": 0.0001987089813087114, "loss": 0.8605, "step": 20060 }, { "epoch": 0.3523587141628189, "grad_norm": 0.06453328614478401, "learning_rate": 0.0001987071869065207, "loss": 0.8624, "step": 20070 }, { "epoch": 0.35253427904282025, "grad_norm": 0.0769593029665899, "learning_rate": 0.00019870539126636122, "loss": 0.8562, "step": 20080 }, { "epoch": 0.3527098439228217, "grad_norm": 0.07390313686004522, "learning_rate": 0.0001987035943882557, "loss": 0.859, "step": 20090 }, { "epoch": 0.3528854088028231, "grad_norm": 0.07484257631919211, "learning_rate": 0.00019870179627222697, "loss": 0.8549, "step": 20100 }, { "epoch": 0.35306097368282446, "grad_norm": 0.05233942075967626, "learning_rate": 0.0001986999969182977, "loss": 0.8504, "step": 20110 }, { "epoch": 0.3532365385628259, "grad_norm": 0.06873565051060167, "learning_rate": 0.0001986981963264908, "loss": 0.8566, "step": 20120 }, { "epoch": 0.3534121034428273, "grad_norm": 0.05326195061645983, "learning_rate": 0.00019869639449682897, "loss": 0.8592, "step": 20130 }, { "epoch": 0.3535876683228287, "grad_norm": 0.06556523802328988, "learning_rate": 0.0001986945914293351, "loss": 0.8624, "step": 20140 }, { "epoch": 0.3537632332028301, "grad_norm": 0.07242523536634146, "learning_rate": 0.00019869278712403207, "loss": 0.8599, "step": 20150 }, { "epoch": 0.3539387980828315, "grad_norm": 0.07754953118306936, "learning_rate": 0.0001986909815809427, "loss": 0.8593, "step": 20160 }, { "epoch": 0.35411436296283294, "grad_norm": 0.05789654239359886, "learning_rate": 0.00019868917480008984, "loss": 0.8579, "step": 20170 }, { "epoch": 0.3542899278428343, "grad_norm": 0.058689837448439815, "learning_rate": 0.00019868736678149644, "loss": 0.8601, "step": 20180 }, { "epoch": 0.35446549272283573, "grad_norm": 0.0681252476981887, "learning_rate": 0.00019868555752518537, "loss": 0.8584, "step": 20190 }, { "epoch": 0.35464105760283715, "grad_norm": 0.07910975863758687, "learning_rate": 0.00019868374703117957, "loss": 0.861, "step": 20200 }, { "epoch": 0.3548166224828385, "grad_norm": 0.06361999003084971, "learning_rate": 0.000198681935299502, "loss": 0.8548, "step": 20210 }, { "epoch": 0.35499218736283994, "grad_norm": 0.06954391710823767, "learning_rate": 0.00019868012233017557, "loss": 0.858, "step": 20220 }, { "epoch": 0.35516775224284136, "grad_norm": 0.053769468743871444, "learning_rate": 0.00019867830812322324, "loss": 0.8611, "step": 20230 }, { "epoch": 0.35534331712284273, "grad_norm": 0.051781819891275706, "learning_rate": 0.00019867649267866805, "loss": 0.862, "step": 20240 }, { "epoch": 0.35551888200284415, "grad_norm": 0.0836105578843198, "learning_rate": 0.00019867467599653298, "loss": 0.8543, "step": 20250 }, { "epoch": 0.35569444688284557, "grad_norm": 0.08230228515551685, "learning_rate": 0.00019867285807684105, "loss": 0.8584, "step": 20260 }, { "epoch": 0.35587001176284694, "grad_norm": 0.07077824414648133, "learning_rate": 0.00019867103891961526, "loss": 0.8557, "step": 20270 }, { "epoch": 0.35604557664284836, "grad_norm": 0.06628259096472172, "learning_rate": 0.0001986692185248787, "loss": 0.8557, "step": 20280 }, { "epoch": 0.3562211415228498, "grad_norm": 0.07641370070077073, "learning_rate": 0.00019866739689265443, "loss": 0.8589, "step": 20290 }, { "epoch": 0.35639670640285115, "grad_norm": 0.09757237773052772, "learning_rate": 0.00019866557402296552, "loss": 0.8627, "step": 20300 }, { "epoch": 0.35657227128285257, "grad_norm": 0.07236062508102792, "learning_rate": 0.0001986637499158351, "loss": 0.862, "step": 20310 }, { "epoch": 0.356747836162854, "grad_norm": 0.05841148482985717, "learning_rate": 0.00019866192457128622, "loss": 0.8531, "step": 20320 }, { "epoch": 0.3569234010428554, "grad_norm": 0.07915689839779634, "learning_rate": 0.00019866009798934206, "loss": 0.8466, "step": 20330 }, { "epoch": 0.3570989659228568, "grad_norm": 0.06630181529700638, "learning_rate": 0.00019865827017002572, "loss": 0.8525, "step": 20340 }, { "epoch": 0.3572745308028582, "grad_norm": 0.062441598034236906, "learning_rate": 0.00019865644111336037, "loss": 0.8597, "step": 20350 }, { "epoch": 0.3574500956828596, "grad_norm": 0.09197148012406059, "learning_rate": 0.00019865461081936922, "loss": 0.8517, "step": 20360 }, { "epoch": 0.357625660562861, "grad_norm": 0.07851641792021084, "learning_rate": 0.00019865277928807543, "loss": 0.8653, "step": 20370 }, { "epoch": 0.3578012254428624, "grad_norm": 0.07460828633647006, "learning_rate": 0.0001986509465195022, "loss": 0.8531, "step": 20380 }, { "epoch": 0.35797679032286384, "grad_norm": 0.06621448048041624, "learning_rate": 0.0001986491125136728, "loss": 0.8648, "step": 20390 }, { "epoch": 0.3581523552028652, "grad_norm": 0.057390429353180096, "learning_rate": 0.0001986472772706104, "loss": 0.8598, "step": 20400 }, { "epoch": 0.3583279200828666, "grad_norm": 0.06221512188821132, "learning_rate": 0.00019864544079033826, "loss": 0.8513, "step": 20410 }, { "epoch": 0.35850348496286805, "grad_norm": 0.052469336455054895, "learning_rate": 0.00019864360307287972, "loss": 0.8581, "step": 20420 }, { "epoch": 0.3586790498428694, "grad_norm": 0.0504191238558091, "learning_rate": 0.00019864176411825798, "loss": 0.8617, "step": 20430 }, { "epoch": 0.35885461472287083, "grad_norm": 0.06414887709086453, "learning_rate": 0.0001986399239264964, "loss": 0.8458, "step": 20440 }, { "epoch": 0.35903017960287226, "grad_norm": 0.0685322691889545, "learning_rate": 0.00019863808249761825, "loss": 0.8521, "step": 20450 }, { "epoch": 0.3592057444828736, "grad_norm": 0.07533151216400781, "learning_rate": 0.00019863623983164692, "loss": 0.8597, "step": 20460 }, { "epoch": 0.35938130936287505, "grad_norm": 0.06722008188016607, "learning_rate": 0.00019863439592860565, "loss": 0.8575, "step": 20470 }, { "epoch": 0.35955687424287647, "grad_norm": 0.05414428543190899, "learning_rate": 0.00019863255078851795, "loss": 0.8582, "step": 20480 }, { "epoch": 0.35973243912287783, "grad_norm": 0.07133769921784942, "learning_rate": 0.00019863070441140707, "loss": 0.8614, "step": 20490 }, { "epoch": 0.35990800400287926, "grad_norm": 0.05138855622320601, "learning_rate": 0.00019862885679729645, "loss": 0.853, "step": 20500 }, { "epoch": 0.3600835688828807, "grad_norm": 0.09435590937062452, "learning_rate": 0.00019862700794620958, "loss": 0.8593, "step": 20510 }, { "epoch": 0.3602591337628821, "grad_norm": 0.06812291235095772, "learning_rate": 0.0001986251578581697, "loss": 0.8647, "step": 20520 }, { "epoch": 0.36043469864288347, "grad_norm": 0.06524320192133441, "learning_rate": 0.00019862330653320042, "loss": 0.8504, "step": 20530 }, { "epoch": 0.3606102635228849, "grad_norm": 0.08785004000841906, "learning_rate": 0.0001986214539713251, "loss": 0.8526, "step": 20540 }, { "epoch": 0.3607858284028863, "grad_norm": 0.07101622580846258, "learning_rate": 0.00019861960017256733, "loss": 0.8634, "step": 20550 }, { "epoch": 0.3609613932828877, "grad_norm": 0.06330315061637946, "learning_rate": 0.00019861774513695043, "loss": 0.8511, "step": 20560 }, { "epoch": 0.3611369581628891, "grad_norm": 0.0652338557445132, "learning_rate": 0.00019861588886449803, "loss": 0.8681, "step": 20570 }, { "epoch": 0.3613125230428905, "grad_norm": 0.10066426516875414, "learning_rate": 0.00019861403135523363, "loss": 0.8515, "step": 20580 }, { "epoch": 0.3614880879228919, "grad_norm": 0.045605147875744316, "learning_rate": 0.00019861217260918072, "loss": 0.8552, "step": 20590 }, { "epoch": 0.3616636528028933, "grad_norm": 0.06137757647397811, "learning_rate": 0.00019861031262636286, "loss": 0.8543, "step": 20600 }, { "epoch": 0.36183921768289473, "grad_norm": 0.05466464941380702, "learning_rate": 0.00019860845140680367, "loss": 0.8609, "step": 20610 }, { "epoch": 0.3620147825628961, "grad_norm": 0.06211208453644534, "learning_rate": 0.00019860658895052668, "loss": 0.8542, "step": 20620 }, { "epoch": 0.3621903474428975, "grad_norm": 0.08191909271875586, "learning_rate": 0.0001986047252575555, "loss": 0.8585, "step": 20630 }, { "epoch": 0.36236591232289894, "grad_norm": 0.07477485575549135, "learning_rate": 0.0001986028603279137, "loss": 0.8566, "step": 20640 }, { "epoch": 0.3625414772029003, "grad_norm": 0.0609639343855269, "learning_rate": 0.00019860099416162501, "loss": 0.8624, "step": 20650 }, { "epoch": 0.36271704208290173, "grad_norm": 0.061352977298533236, "learning_rate": 0.00019859912675871302, "loss": 0.8591, "step": 20660 }, { "epoch": 0.36289260696290315, "grad_norm": 0.06861804094859952, "learning_rate": 0.00019859725811920137, "loss": 0.8555, "step": 20670 }, { "epoch": 0.3630681718429045, "grad_norm": 0.060921008196608596, "learning_rate": 0.00019859538824311377, "loss": 0.8583, "step": 20680 }, { "epoch": 0.36324373672290594, "grad_norm": 0.07007639029575313, "learning_rate": 0.00019859351713047386, "loss": 0.8622, "step": 20690 }, { "epoch": 0.36341930160290736, "grad_norm": 0.07136377048766915, "learning_rate": 0.00019859164478130544, "loss": 0.8601, "step": 20700 }, { "epoch": 0.3635948664829088, "grad_norm": 0.08082491643464151, "learning_rate": 0.00019858977119563214, "loss": 0.8547, "step": 20710 }, { "epoch": 0.36377043136291015, "grad_norm": 0.06168096771557879, "learning_rate": 0.0001985878963734777, "loss": 0.8566, "step": 20720 }, { "epoch": 0.3639459962429116, "grad_norm": 0.059272326271018645, "learning_rate": 0.00019858602031486597, "loss": 0.8503, "step": 20730 }, { "epoch": 0.364121561122913, "grad_norm": 0.07280361445766313, "learning_rate": 0.00019858414301982064, "loss": 0.8532, "step": 20740 }, { "epoch": 0.36429712600291436, "grad_norm": 0.06923731370291544, "learning_rate": 0.00019858226448836555, "loss": 0.8447, "step": 20750 }, { "epoch": 0.3644726908829158, "grad_norm": 0.09024439616340618, "learning_rate": 0.00019858038472052444, "loss": 0.8567, "step": 20760 }, { "epoch": 0.3646482557629172, "grad_norm": 0.06630720028593005, "learning_rate": 0.00019857850371632113, "loss": 0.8564, "step": 20770 }, { "epoch": 0.3648238206429186, "grad_norm": 0.059560027169951, "learning_rate": 0.0001985766214757795, "loss": 0.853, "step": 20780 }, { "epoch": 0.36499938552292, "grad_norm": 0.07936113175026589, "learning_rate": 0.00019857473799892337, "loss": 0.8593, "step": 20790 }, { "epoch": 0.3651749504029214, "grad_norm": 0.06009347204594745, "learning_rate": 0.00019857285328577659, "loss": 0.8549, "step": 20800 }, { "epoch": 0.3653505152829228, "grad_norm": 0.052488318479547436, "learning_rate": 0.0001985709673363631, "loss": 0.8625, "step": 20810 }, { "epoch": 0.3655260801629242, "grad_norm": 0.08310975066718011, "learning_rate": 0.0001985690801507067, "loss": 0.855, "step": 20820 }, { "epoch": 0.3657016450429256, "grad_norm": 0.09194066940430959, "learning_rate": 0.00019856719172883143, "loss": 0.8541, "step": 20830 }, { "epoch": 0.365877209922927, "grad_norm": 0.09419886219868105, "learning_rate": 0.00019856530207076108, "loss": 0.8552, "step": 20840 }, { "epoch": 0.3660527748029284, "grad_norm": 0.09113880642862149, "learning_rate": 0.0001985634111765197, "loss": 0.8521, "step": 20850 }, { "epoch": 0.36622833968292984, "grad_norm": 0.11714406946777135, "learning_rate": 0.00019856151904613117, "loss": 0.8486, "step": 20860 }, { "epoch": 0.3664039045629312, "grad_norm": 0.09361727897457355, "learning_rate": 0.00019855962567961953, "loss": 0.8549, "step": 20870 }, { "epoch": 0.3665794694429326, "grad_norm": 0.06762462946141516, "learning_rate": 0.00019855773107700867, "loss": 0.8615, "step": 20880 }, { "epoch": 0.36675503432293405, "grad_norm": 0.052170002929892864, "learning_rate": 0.00019855583523832273, "loss": 0.8635, "step": 20890 }, { "epoch": 0.36693059920293547, "grad_norm": 0.061353774280918726, "learning_rate": 0.00019855393816358563, "loss": 0.8541, "step": 20900 }, { "epoch": 0.36710616408293684, "grad_norm": 0.06546734598510343, "learning_rate": 0.00019855203985282146, "loss": 0.8658, "step": 20910 }, { "epoch": 0.36728172896293826, "grad_norm": 0.08629601450004108, "learning_rate": 0.0001985501403060542, "loss": 0.8616, "step": 20920 }, { "epoch": 0.3674572938429397, "grad_norm": 0.061456611201421756, "learning_rate": 0.00019854823952330798, "loss": 0.8641, "step": 20930 }, { "epoch": 0.36763285872294105, "grad_norm": 0.06567825956902973, "learning_rate": 0.0001985463375046069, "loss": 0.8591, "step": 20940 }, { "epoch": 0.36780842360294247, "grad_norm": 0.06449822421912224, "learning_rate": 0.00019854443424997505, "loss": 0.8625, "step": 20950 }, { "epoch": 0.3679839884829439, "grad_norm": 0.07966224644673892, "learning_rate": 0.00019854252975943647, "loss": 0.8585, "step": 20960 }, { "epoch": 0.36815955336294526, "grad_norm": 0.06390297806942022, "learning_rate": 0.0001985406240330154, "loss": 0.8594, "step": 20970 }, { "epoch": 0.3683351182429467, "grad_norm": 0.07063656669323085, "learning_rate": 0.00019853871707073585, "loss": 0.8569, "step": 20980 }, { "epoch": 0.3685106831229481, "grad_norm": 0.06747201721028045, "learning_rate": 0.00019853680887262212, "loss": 0.8534, "step": 20990 }, { "epoch": 0.36868624800294947, "grad_norm": 0.11369187023359388, "learning_rate": 0.00019853489943869832, "loss": 0.8575, "step": 21000 }, { "epoch": 0.3688618128829509, "grad_norm": 0.06647586403393553, "learning_rate": 0.00019853298876898863, "loss": 0.8511, "step": 21010 }, { "epoch": 0.3690373777629523, "grad_norm": 0.054195001151210224, "learning_rate": 0.0001985310768635173, "loss": 0.8544, "step": 21020 }, { "epoch": 0.3692129426429537, "grad_norm": 0.05363407736344607, "learning_rate": 0.00019852916372230852, "loss": 0.8645, "step": 21030 }, { "epoch": 0.3693885075229551, "grad_norm": 0.08611382219943738, "learning_rate": 0.0001985272493453866, "loss": 0.8636, "step": 21040 }, { "epoch": 0.3695640724029565, "grad_norm": 0.10107962828797619, "learning_rate": 0.00019852533373277567, "loss": 0.8601, "step": 21050 }, { "epoch": 0.36973963728295794, "grad_norm": 0.06454278566893266, "learning_rate": 0.0001985234168845001, "loss": 0.8569, "step": 21060 }, { "epoch": 0.3699152021629593, "grad_norm": 0.07356993685733872, "learning_rate": 0.00019852149880058412, "loss": 0.854, "step": 21070 }, { "epoch": 0.37009076704296073, "grad_norm": 0.051505103504157726, "learning_rate": 0.00019851957948105212, "loss": 0.8571, "step": 21080 }, { "epoch": 0.37026633192296216, "grad_norm": 0.06192958159405417, "learning_rate": 0.0001985176589259283, "loss": 0.8572, "step": 21090 }, { "epoch": 0.3704418968029635, "grad_norm": 0.06626773876582008, "learning_rate": 0.00019851573713523707, "loss": 0.8645, "step": 21100 }, { "epoch": 0.37061746168296494, "grad_norm": 0.08097625460621588, "learning_rate": 0.00019851381410900275, "loss": 0.8518, "step": 21110 }, { "epoch": 0.37079302656296637, "grad_norm": 0.09360490054101624, "learning_rate": 0.00019851188984724974, "loss": 0.8532, "step": 21120 }, { "epoch": 0.37096859144296773, "grad_norm": 0.08246888161615051, "learning_rate": 0.00019850996435000242, "loss": 0.8599, "step": 21130 }, { "epoch": 0.37114415632296915, "grad_norm": 0.068838910529555, "learning_rate": 0.00019850803761728512, "loss": 0.8493, "step": 21140 }, { "epoch": 0.3713197212029706, "grad_norm": 0.0782346845729182, "learning_rate": 0.00019850610964912234, "loss": 0.8554, "step": 21150 }, { "epoch": 0.37149528608297194, "grad_norm": 0.06917961834006454, "learning_rate": 0.00019850418044553843, "loss": 0.8468, "step": 21160 }, { "epoch": 0.37167085096297336, "grad_norm": 0.059836160652915284, "learning_rate": 0.00019850225000655786, "loss": 0.8561, "step": 21170 }, { "epoch": 0.3718464158429748, "grad_norm": 0.06287104467293583, "learning_rate": 0.0001985003183322051, "loss": 0.8478, "step": 21180 }, { "epoch": 0.37202198072297615, "grad_norm": 0.07072310927351382, "learning_rate": 0.00019849838542250462, "loss": 0.8567, "step": 21190 }, { "epoch": 0.3721975456029776, "grad_norm": 0.06533132003157183, "learning_rate": 0.0001984964512774809, "loss": 0.8669, "step": 21200 }, { "epoch": 0.372373110482979, "grad_norm": 0.07364924707063636, "learning_rate": 0.00019849451589715848, "loss": 0.8506, "step": 21210 }, { "epoch": 0.37254867536298036, "grad_norm": 0.0651331914491308, "learning_rate": 0.00019849257928156186, "loss": 0.8538, "step": 21220 }, { "epoch": 0.3727242402429818, "grad_norm": 0.07513384761459455, "learning_rate": 0.0001984906414307156, "loss": 0.8455, "step": 21230 }, { "epoch": 0.3728998051229832, "grad_norm": 0.06373187625050931, "learning_rate": 0.0001984887023446442, "loss": 0.8601, "step": 21240 }, { "epoch": 0.37307537000298463, "grad_norm": 0.05347574943371268, "learning_rate": 0.00019848676202337227, "loss": 0.8624, "step": 21250 }, { "epoch": 0.373250934882986, "grad_norm": 0.06623527392777927, "learning_rate": 0.00019848482046692435, "loss": 0.8561, "step": 21260 }, { "epoch": 0.3734264997629874, "grad_norm": 0.05664131753698621, "learning_rate": 0.00019848287767532506, "loss": 0.8676, "step": 21270 }, { "epoch": 0.37360206464298884, "grad_norm": 0.05824008572306137, "learning_rate": 0.0001984809336485991, "loss": 0.8597, "step": 21280 }, { "epoch": 0.3737776295229902, "grad_norm": 0.07433870865810295, "learning_rate": 0.00019847898838677095, "loss": 0.8462, "step": 21290 }, { "epoch": 0.37395319440299163, "grad_norm": 0.07267265418783667, "learning_rate": 0.0001984770418898654, "loss": 0.866, "step": 21300 }, { "epoch": 0.37412875928299305, "grad_norm": 0.0828913885238659, "learning_rate": 0.00019847509415790702, "loss": 0.8632, "step": 21310 }, { "epoch": 0.3743043241629944, "grad_norm": 0.060059736444323567, "learning_rate": 0.0001984731451909205, "loss": 0.8559, "step": 21320 }, { "epoch": 0.37447988904299584, "grad_norm": 0.06761030589463742, "learning_rate": 0.00019847119498893058, "loss": 0.8549, "step": 21330 }, { "epoch": 0.37465545392299726, "grad_norm": 0.06441231019183383, "learning_rate": 0.00019846924355196194, "loss": 0.8591, "step": 21340 }, { "epoch": 0.37483101880299863, "grad_norm": 0.09252416471506107, "learning_rate": 0.0001984672908800393, "loss": 0.8601, "step": 21350 }, { "epoch": 0.37500658368300005, "grad_norm": 0.07169519748551925, "learning_rate": 0.00019846533697318735, "loss": 0.8566, "step": 21360 }, { "epoch": 0.37518214856300147, "grad_norm": 0.05729009952544185, "learning_rate": 0.00019846338183143096, "loss": 0.8526, "step": 21370 }, { "epoch": 0.37535771344300284, "grad_norm": 0.04695375908852543, "learning_rate": 0.00019846142545479483, "loss": 0.866, "step": 21380 }, { "epoch": 0.37553327832300426, "grad_norm": 0.06351044343605845, "learning_rate": 0.00019845946784330376, "loss": 0.8544, "step": 21390 }, { "epoch": 0.3757088432030057, "grad_norm": 0.06420551934979944, "learning_rate": 0.00019845750899698258, "loss": 0.8588, "step": 21400 }, { "epoch": 0.37588440808300705, "grad_norm": 0.05098219700887573, "learning_rate": 0.00019845554891585604, "loss": 0.8703, "step": 21410 }, { "epoch": 0.37605997296300847, "grad_norm": 0.061368845639855486, "learning_rate": 0.00019845358759994905, "loss": 0.864, "step": 21420 }, { "epoch": 0.3762355378430099, "grad_norm": 0.06839721721981801, "learning_rate": 0.0001984516250492864, "loss": 0.8572, "step": 21430 }, { "epoch": 0.3764111027230113, "grad_norm": 0.09748133537404186, "learning_rate": 0.00019844966126389302, "loss": 0.8529, "step": 21440 }, { "epoch": 0.3765866676030127, "grad_norm": 0.09211973957222723, "learning_rate": 0.00019844769624379372, "loss": 0.861, "step": 21450 }, { "epoch": 0.3767622324830141, "grad_norm": 0.06414265671058293, "learning_rate": 0.00019844572998901344, "loss": 0.8548, "step": 21460 }, { "epoch": 0.3769377973630155, "grad_norm": 0.07306796978890948, "learning_rate": 0.0001984437624995771, "loss": 0.87, "step": 21470 }, { "epoch": 0.3771133622430169, "grad_norm": 0.06974604823330724, "learning_rate": 0.0001984417937755096, "loss": 0.8542, "step": 21480 }, { "epoch": 0.3772889271230183, "grad_norm": 0.0627619367922476, "learning_rate": 0.00019843982381683592, "loss": 0.8565, "step": 21490 }, { "epoch": 0.37746449200301974, "grad_norm": 0.06649293911201172, "learning_rate": 0.000198437852623581, "loss": 0.8527, "step": 21500 }, { "epoch": 0.3776400568830211, "grad_norm": 0.068434194766881, "learning_rate": 0.00019843588019576977, "loss": 0.8536, "step": 21510 }, { "epoch": 0.3778156217630225, "grad_norm": 0.07152553983546923, "learning_rate": 0.00019843390653342732, "loss": 0.8598, "step": 21520 }, { "epoch": 0.37799118664302395, "grad_norm": 0.07897481837040127, "learning_rate": 0.00019843193163657854, "loss": 0.8635, "step": 21530 }, { "epoch": 0.3781667515230253, "grad_norm": 0.0604294316830433, "learning_rate": 0.00019842995550524857, "loss": 0.8513, "step": 21540 }, { "epoch": 0.37834231640302673, "grad_norm": 0.12348898178664286, "learning_rate": 0.00019842797813946237, "loss": 0.859, "step": 21550 }, { "epoch": 0.37851788128302816, "grad_norm": 0.10442615986459526, "learning_rate": 0.000198425999539245, "loss": 0.8564, "step": 21560 }, { "epoch": 0.3786934461630295, "grad_norm": 0.0729075213909541, "learning_rate": 0.00019842401970462156, "loss": 0.8572, "step": 21570 }, { "epoch": 0.37886901104303095, "grad_norm": 0.06698044564629187, "learning_rate": 0.00019842203863561713, "loss": 0.8588, "step": 21580 }, { "epoch": 0.37904457592303237, "grad_norm": 0.05945893581426337, "learning_rate": 0.00019842005633225677, "loss": 0.8588, "step": 21590 }, { "epoch": 0.37922014080303373, "grad_norm": 0.07234279952243321, "learning_rate": 0.00019841807279456566, "loss": 0.8581, "step": 21600 }, { "epoch": 0.37939570568303516, "grad_norm": 0.06653919519593295, "learning_rate": 0.00019841608802256887, "loss": 0.8643, "step": 21610 }, { "epoch": 0.3795712705630366, "grad_norm": 0.08774200752167353, "learning_rate": 0.00019841410201629157, "loss": 0.8569, "step": 21620 }, { "epoch": 0.379746835443038, "grad_norm": 0.07013883991986533, "learning_rate": 0.00019841211477575896, "loss": 0.8544, "step": 21630 }, { "epoch": 0.37992240032303937, "grad_norm": 0.07454836373073823, "learning_rate": 0.00019841012630099616, "loss": 0.8547, "step": 21640 }, { "epoch": 0.3800979652030408, "grad_norm": 0.06913589721720555, "learning_rate": 0.0001984081365920284, "loss": 0.86, "step": 21650 }, { "epoch": 0.3802735300830422, "grad_norm": 0.09597324611263876, "learning_rate": 0.00019840614564888088, "loss": 0.8578, "step": 21660 }, { "epoch": 0.3804490949630436, "grad_norm": 0.05161470921538061, "learning_rate": 0.00019840415347157882, "loss": 0.8522, "step": 21670 }, { "epoch": 0.380624659843045, "grad_norm": 0.067454923160964, "learning_rate": 0.00019840216006014748, "loss": 0.8496, "step": 21680 }, { "epoch": 0.3808002247230464, "grad_norm": 0.06804149091312632, "learning_rate": 0.0001984001654146121, "loss": 0.852, "step": 21690 }, { "epoch": 0.3809757896030478, "grad_norm": 0.08742635083079356, "learning_rate": 0.00019839816953499796, "loss": 0.8524, "step": 21700 }, { "epoch": 0.3811513544830492, "grad_norm": 0.08243442393559469, "learning_rate": 0.00019839617242133038, "loss": 0.8592, "step": 21710 }, { "epoch": 0.38132691936305063, "grad_norm": 0.0658712765669098, "learning_rate": 0.00019839417407363456, "loss": 0.854, "step": 21720 }, { "epoch": 0.381502484243052, "grad_norm": 0.06042899135510992, "learning_rate": 0.00019839217449193594, "loss": 0.8555, "step": 21730 }, { "epoch": 0.3816780491230534, "grad_norm": 0.05263623340379115, "learning_rate": 0.0001983901736762598, "loss": 0.8575, "step": 21740 }, { "epoch": 0.38185361400305484, "grad_norm": 0.061425149239162875, "learning_rate": 0.00019838817162663147, "loss": 0.8562, "step": 21750 }, { "epoch": 0.3820291788830562, "grad_norm": 0.06723810192075078, "learning_rate": 0.00019838616834307632, "loss": 0.8561, "step": 21760 }, { "epoch": 0.38220474376305763, "grad_norm": 0.06120698886984292, "learning_rate": 0.0001983841638256198, "loss": 0.847, "step": 21770 }, { "epoch": 0.38238030864305905, "grad_norm": 0.05112279863930128, "learning_rate": 0.00019838215807428723, "loss": 0.853, "step": 21780 }, { "epoch": 0.3825558735230605, "grad_norm": 0.08475370036103823, "learning_rate": 0.0001983801510891041, "loss": 0.8541, "step": 21790 }, { "epoch": 0.38273143840306184, "grad_norm": 0.08191707729787495, "learning_rate": 0.00019837814287009578, "loss": 0.8581, "step": 21800 }, { "epoch": 0.38290700328306326, "grad_norm": 0.062182770724212605, "learning_rate": 0.0001983761334172877, "loss": 0.8662, "step": 21810 }, { "epoch": 0.3830825681630647, "grad_norm": 0.05955800143147683, "learning_rate": 0.00019837412273070534, "loss": 0.8691, "step": 21820 }, { "epoch": 0.38325813304306605, "grad_norm": 0.05968948344729945, "learning_rate": 0.00019837211081037424, "loss": 0.8573, "step": 21830 }, { "epoch": 0.3834336979230675, "grad_norm": 0.06967898929879139, "learning_rate": 0.00019837009765631978, "loss": 0.8516, "step": 21840 }, { "epoch": 0.3836092628030689, "grad_norm": 0.08202778179809123, "learning_rate": 0.00019836808326856753, "loss": 0.8575, "step": 21850 }, { "epoch": 0.38378482768307026, "grad_norm": 0.07447009445489626, "learning_rate": 0.00019836606764714302, "loss": 0.8619, "step": 21860 }, { "epoch": 0.3839603925630717, "grad_norm": 0.06088996716384084, "learning_rate": 0.00019836405079207175, "loss": 0.8566, "step": 21870 }, { "epoch": 0.3841359574430731, "grad_norm": 0.07542266248653058, "learning_rate": 0.00019836203270337933, "loss": 0.8632, "step": 21880 }, { "epoch": 0.3843115223230745, "grad_norm": 0.0575769895311918, "learning_rate": 0.00019836001338109128, "loss": 0.8512, "step": 21890 }, { "epoch": 0.3844870872030759, "grad_norm": 0.07302798104513017, "learning_rate": 0.00019835799282523318, "loss": 0.8643, "step": 21900 }, { "epoch": 0.3846626520830773, "grad_norm": 0.0777591160239891, "learning_rate": 0.00019835597103583068, "loss": 0.8596, "step": 21910 }, { "epoch": 0.3848382169630787, "grad_norm": 0.07105437521817472, "learning_rate": 0.00019835394801290937, "loss": 0.8633, "step": 21920 }, { "epoch": 0.3850137818430801, "grad_norm": 0.04866051436927219, "learning_rate": 0.00019835192375649487, "loss": 0.8382, "step": 21930 }, { "epoch": 0.3851893467230815, "grad_norm": 0.061221708716225454, "learning_rate": 0.00019834989826661285, "loss": 0.8593, "step": 21940 }, { "epoch": 0.3853649116030829, "grad_norm": 0.059668558885333914, "learning_rate": 0.00019834787154328893, "loss": 0.8589, "step": 21950 }, { "epoch": 0.3855404764830843, "grad_norm": 0.06464661351471321, "learning_rate": 0.00019834584358654887, "loss": 0.8585, "step": 21960 }, { "epoch": 0.38571604136308574, "grad_norm": 0.0699951449185999, "learning_rate": 0.00019834381439641827, "loss": 0.8567, "step": 21970 }, { "epoch": 0.38589160624308716, "grad_norm": 0.0766102167523704, "learning_rate": 0.00019834178397292292, "loss": 0.8565, "step": 21980 }, { "epoch": 0.3860671711230885, "grad_norm": 0.06317028557785732, "learning_rate": 0.00019833975231608848, "loss": 0.8562, "step": 21990 }, { "epoch": 0.38624273600308995, "grad_norm": 0.062228491986103335, "learning_rate": 0.00019833771942594076, "loss": 0.8518, "step": 22000 }, { "epoch": 0.38641830088309137, "grad_norm": 0.07941131441190327, "learning_rate": 0.00019833568530250545, "loss": 0.8634, "step": 22010 }, { "epoch": 0.38659386576309274, "grad_norm": 0.08101349752320992, "learning_rate": 0.00019833364994580835, "loss": 0.8598, "step": 22020 }, { "epoch": 0.38676943064309416, "grad_norm": 0.08730677003659564, "learning_rate": 0.00019833161335587528, "loss": 0.8576, "step": 22030 }, { "epoch": 0.3869449955230956, "grad_norm": 0.07674069550870512, "learning_rate": 0.000198329575532732, "loss": 0.8679, "step": 22040 }, { "epoch": 0.38712056040309695, "grad_norm": 0.08539629431414497, "learning_rate": 0.00019832753647640433, "loss": 0.8606, "step": 22050 }, { "epoch": 0.38729612528309837, "grad_norm": 0.06752679539515945, "learning_rate": 0.00019832549618691817, "loss": 0.8593, "step": 22060 }, { "epoch": 0.3874716901630998, "grad_norm": 0.08454307736531158, "learning_rate": 0.00019832345466429924, "loss": 0.8533, "step": 22070 }, { "epoch": 0.38764725504310116, "grad_norm": 0.08392256778495387, "learning_rate": 0.00019832141190857358, "loss": 0.8552, "step": 22080 }, { "epoch": 0.3878228199231026, "grad_norm": 0.07883678788043542, "learning_rate": 0.00019831936791976694, "loss": 0.8568, "step": 22090 }, { "epoch": 0.387998384803104, "grad_norm": 0.06000413353228194, "learning_rate": 0.0001983173226979052, "loss": 0.8474, "step": 22100 }, { "epoch": 0.38817394968310537, "grad_norm": 0.06310719080970618, "learning_rate": 0.00019831527624301437, "loss": 0.8515, "step": 22110 }, { "epoch": 0.3883495145631068, "grad_norm": 0.06069011599970246, "learning_rate": 0.00019831322855512037, "loss": 0.8522, "step": 22120 }, { "epoch": 0.3885250794431082, "grad_norm": 0.07004895507552614, "learning_rate": 0.00019831117963424908, "loss": 0.8578, "step": 22130 }, { "epoch": 0.3887006443231096, "grad_norm": 0.07915452781639763, "learning_rate": 0.00019830912948042652, "loss": 0.8547, "step": 22140 }, { "epoch": 0.388876209203111, "grad_norm": 0.0737491989109706, "learning_rate": 0.00019830707809367863, "loss": 0.858, "step": 22150 }, { "epoch": 0.3890517740831124, "grad_norm": 0.05073966065280343, "learning_rate": 0.00019830502547403143, "loss": 0.8622, "step": 22160 }, { "epoch": 0.38922733896311384, "grad_norm": 0.056876122013873404, "learning_rate": 0.00019830297162151087, "loss": 0.8551, "step": 22170 }, { "epoch": 0.3894029038431152, "grad_norm": 0.08502826017405937, "learning_rate": 0.000198300916536143, "loss": 0.8574, "step": 22180 }, { "epoch": 0.38957846872311663, "grad_norm": 0.07577126998531585, "learning_rate": 0.00019829886021795393, "loss": 0.8488, "step": 22190 }, { "epoch": 0.38975403360311806, "grad_norm": 0.06777178163113586, "learning_rate": 0.00019829680266696963, "loss": 0.8591, "step": 22200 }, { "epoch": 0.3899295984831194, "grad_norm": 0.0833149312326189, "learning_rate": 0.0001982947438832162, "loss": 0.8595, "step": 22210 }, { "epoch": 0.39010516336312084, "grad_norm": 0.09386928974660287, "learning_rate": 0.00019829268386671971, "loss": 0.857, "step": 22220 }, { "epoch": 0.39028072824312227, "grad_norm": 0.10050378435036543, "learning_rate": 0.00019829062261750627, "loss": 0.862, "step": 22230 }, { "epoch": 0.39045629312312363, "grad_norm": 0.05842977046027029, "learning_rate": 0.000198288560135602, "loss": 0.8676, "step": 22240 }, { "epoch": 0.39063185800312505, "grad_norm": 0.05776647969457122, "learning_rate": 0.00019828649642103302, "loss": 0.8602, "step": 22250 }, { "epoch": 0.3908074228831265, "grad_norm": 0.06435581739826694, "learning_rate": 0.00019828443147382546, "loss": 0.8607, "step": 22260 }, { "epoch": 0.39098298776312784, "grad_norm": 0.06046419926844219, "learning_rate": 0.00019828236529400556, "loss": 0.8581, "step": 22270 }, { "epoch": 0.39115855264312926, "grad_norm": 0.07230999646359845, "learning_rate": 0.0001982802978815994, "loss": 0.86, "step": 22280 }, { "epoch": 0.3913341175231307, "grad_norm": 0.07432619716115772, "learning_rate": 0.00019827822923663326, "loss": 0.8581, "step": 22290 }, { "epoch": 0.39150968240313205, "grad_norm": 0.06391885689081613, "learning_rate": 0.00019827615935913328, "loss": 0.853, "step": 22300 }, { "epoch": 0.3916852472831335, "grad_norm": 0.07544428942790078, "learning_rate": 0.00019827408824912575, "loss": 0.8641, "step": 22310 }, { "epoch": 0.3918608121631349, "grad_norm": 0.08299436477060301, "learning_rate": 0.00019827201590663684, "loss": 0.8575, "step": 22320 }, { "epoch": 0.39203637704313626, "grad_norm": 0.06463450008870716, "learning_rate": 0.0001982699423316929, "loss": 0.8593, "step": 22330 }, { "epoch": 0.3922119419231377, "grad_norm": 0.06684675988496451, "learning_rate": 0.00019826786752432009, "loss": 0.8563, "step": 22340 }, { "epoch": 0.3923875068031391, "grad_norm": 0.06631351982775205, "learning_rate": 0.0001982657914845448, "loss": 0.8604, "step": 22350 }, { "epoch": 0.39256307168314053, "grad_norm": 0.06694191320663952, "learning_rate": 0.00019826371421239328, "loss": 0.8561, "step": 22360 }, { "epoch": 0.3927386365631419, "grad_norm": 0.07105473389214038, "learning_rate": 0.00019826163570789185, "loss": 0.8534, "step": 22370 }, { "epoch": 0.3929142014431433, "grad_norm": 0.09865078226629627, "learning_rate": 0.00019825955597106687, "loss": 0.8633, "step": 22380 }, { "epoch": 0.39308976632314474, "grad_norm": 0.06563134360469504, "learning_rate": 0.00019825747500194467, "loss": 0.8516, "step": 22390 }, { "epoch": 0.3932653312031461, "grad_norm": 0.08067568616037082, "learning_rate": 0.0001982553928005516, "loss": 0.8506, "step": 22400 }, { "epoch": 0.39344089608314753, "grad_norm": 0.07496220565530316, "learning_rate": 0.0001982533093669141, "loss": 0.8496, "step": 22410 }, { "epoch": 0.39361646096314895, "grad_norm": 0.06445599681336384, "learning_rate": 0.0001982512247010585, "loss": 0.8565, "step": 22420 }, { "epoch": 0.3937920258431503, "grad_norm": 0.07657176962003114, "learning_rate": 0.00019824913880301127, "loss": 0.8491, "step": 22430 }, { "epoch": 0.39396759072315174, "grad_norm": 0.06882316680340718, "learning_rate": 0.00019824705167279883, "loss": 0.8539, "step": 22440 }, { "epoch": 0.39414315560315316, "grad_norm": 0.05671388949866655, "learning_rate": 0.00019824496331044756, "loss": 0.8563, "step": 22450 }, { "epoch": 0.39431872048315453, "grad_norm": 0.07974667067703264, "learning_rate": 0.000198242873715984, "loss": 0.8594, "step": 22460 }, { "epoch": 0.39449428536315595, "grad_norm": 0.08188042810165573, "learning_rate": 0.00019824078288943457, "loss": 0.8538, "step": 22470 }, { "epoch": 0.39466985024315737, "grad_norm": 0.0733855376192077, "learning_rate": 0.0001982386908308258, "loss": 0.8584, "step": 22480 }, { "epoch": 0.39484541512315874, "grad_norm": 0.06036357156630562, "learning_rate": 0.00019823659754018418, "loss": 0.8635, "step": 22490 }, { "epoch": 0.39502098000316016, "grad_norm": 0.06661629026615645, "learning_rate": 0.00019823450301753625, "loss": 0.8564, "step": 22500 }, { "epoch": 0.3951965448831616, "grad_norm": 0.06231781656975422, "learning_rate": 0.00019823240726290852, "loss": 0.8561, "step": 22510 }, { "epoch": 0.395372109763163, "grad_norm": 0.06096240379003488, "learning_rate": 0.00019823031027632756, "loss": 0.8565, "step": 22520 }, { "epoch": 0.39554767464316437, "grad_norm": 0.09393680630296437, "learning_rate": 0.00019822821205781993, "loss": 0.8535, "step": 22530 }, { "epoch": 0.3957232395231658, "grad_norm": 0.05414479707211924, "learning_rate": 0.0001982261126074122, "loss": 0.8596, "step": 22540 }, { "epoch": 0.3958988044031672, "grad_norm": 0.14926894859863835, "learning_rate": 0.000198224011925131, "loss": 0.8441, "step": 22550 }, { "epoch": 0.3960743692831686, "grad_norm": 0.059054374111373424, "learning_rate": 0.00019822191001100297, "loss": 0.8644, "step": 22560 }, { "epoch": 0.39624993416317, "grad_norm": 0.07342881158190863, "learning_rate": 0.00019821980686505466, "loss": 0.8573, "step": 22570 }, { "epoch": 0.3964254990431714, "grad_norm": 0.08730806487700825, "learning_rate": 0.00019821770248731283, "loss": 0.8601, "step": 22580 }, { "epoch": 0.3966010639231728, "grad_norm": 0.07579893119437758, "learning_rate": 0.000198215596877804, "loss": 0.8607, "step": 22590 }, { "epoch": 0.3967766288031742, "grad_norm": 0.06688473267349664, "learning_rate": 0.00019821349003655496, "loss": 0.8569, "step": 22600 }, { "epoch": 0.39695219368317564, "grad_norm": 0.12019625690946904, "learning_rate": 0.00019821138196359235, "loss": 0.8521, "step": 22610 }, { "epoch": 0.397127758563177, "grad_norm": 0.07462574683190502, "learning_rate": 0.00019820927265894295, "loss": 0.859, "step": 22620 }, { "epoch": 0.3973033234431784, "grad_norm": 0.08351665320837431, "learning_rate": 0.0001982071621226334, "loss": 0.8663, "step": 22630 }, { "epoch": 0.39747888832317985, "grad_norm": 0.06282382221284032, "learning_rate": 0.0001982050503546905, "loss": 0.85, "step": 22640 }, { "epoch": 0.3976544532031812, "grad_norm": 0.0761864320388309, "learning_rate": 0.00019820293735514093, "loss": 0.8541, "step": 22650 }, { "epoch": 0.39783001808318263, "grad_norm": 0.09154468750326782, "learning_rate": 0.00019820082312401157, "loss": 0.8494, "step": 22660 }, { "epoch": 0.39800558296318406, "grad_norm": 0.06062876496083987, "learning_rate": 0.00019819870766132913, "loss": 0.8577, "step": 22670 }, { "epoch": 0.3981811478431854, "grad_norm": 0.06463480748692994, "learning_rate": 0.0001981965909671204, "loss": 0.8555, "step": 22680 }, { "epoch": 0.39835671272318685, "grad_norm": 0.06335097480852701, "learning_rate": 0.0001981944730414123, "loss": 0.8605, "step": 22690 }, { "epoch": 0.39853227760318827, "grad_norm": 0.05695027002412183, "learning_rate": 0.00019819235388423157, "loss": 0.8648, "step": 22700 }, { "epoch": 0.3987078424831897, "grad_norm": 0.0838035235423582, "learning_rate": 0.0001981902334956051, "loss": 0.8485, "step": 22710 }, { "epoch": 0.39888340736319106, "grad_norm": 0.07827330896906573, "learning_rate": 0.00019818811187555975, "loss": 0.854, "step": 22720 }, { "epoch": 0.3990589722431925, "grad_norm": 0.09509872634496237, "learning_rate": 0.00019818598902412237, "loss": 0.8533, "step": 22730 }, { "epoch": 0.3992345371231939, "grad_norm": 0.06138830162742917, "learning_rate": 0.0001981838649413199, "loss": 0.8575, "step": 22740 }, { "epoch": 0.39941010200319527, "grad_norm": 0.06111454213300817, "learning_rate": 0.00019818173962717924, "loss": 0.859, "step": 22750 }, { "epoch": 0.3995856668831967, "grad_norm": 0.06751742782045675, "learning_rate": 0.00019817961308172727, "loss": 0.8699, "step": 22760 }, { "epoch": 0.3997612317631981, "grad_norm": 0.059671829447977956, "learning_rate": 0.00019817748530499103, "loss": 0.8533, "step": 22770 }, { "epoch": 0.3999367966431995, "grad_norm": 0.047728806272552125, "learning_rate": 0.00019817535629699742, "loss": 0.861, "step": 22780 }, { "epoch": 0.4001123615232009, "grad_norm": 0.08476539347533435, "learning_rate": 0.00019817322605777339, "loss": 0.8515, "step": 22790 }, { "epoch": 0.4002879264032023, "grad_norm": 0.06427466247493895, "learning_rate": 0.00019817109458734598, "loss": 0.8586, "step": 22800 }, { "epoch": 0.4004634912832037, "grad_norm": 0.058135872337657137, "learning_rate": 0.0001981689618857422, "loss": 0.8649, "step": 22810 }, { "epoch": 0.4006390561632051, "grad_norm": 0.05866619117213911, "learning_rate": 0.00019816682795298905, "loss": 0.8576, "step": 22820 }, { "epoch": 0.40081462104320653, "grad_norm": 0.1164585560052983, "learning_rate": 0.00019816469278911356, "loss": 0.8561, "step": 22830 }, { "epoch": 0.4009901859232079, "grad_norm": 0.05889345340400301, "learning_rate": 0.00019816255639414277, "loss": 0.8581, "step": 22840 }, { "epoch": 0.4011657508032093, "grad_norm": 0.07101314173848436, "learning_rate": 0.0001981604187681038, "loss": 0.8677, "step": 22850 }, { "epoch": 0.40134131568321074, "grad_norm": 0.0635159803219572, "learning_rate": 0.00019815827991102368, "loss": 0.8463, "step": 22860 }, { "epoch": 0.4015168805632121, "grad_norm": 0.06035743295557798, "learning_rate": 0.00019815613982292953, "loss": 0.8527, "step": 22870 }, { "epoch": 0.40169244544321353, "grad_norm": 0.06510149830873506, "learning_rate": 0.0001981539985038485, "loss": 0.8608, "step": 22880 }, { "epoch": 0.40186801032321495, "grad_norm": 0.08104299272364077, "learning_rate": 0.00019815185595380765, "loss": 0.8584, "step": 22890 }, { "epoch": 0.4020435752032164, "grad_norm": 0.08294908559809008, "learning_rate": 0.0001981497121728342, "loss": 0.8553, "step": 22900 }, { "epoch": 0.40221914008321774, "grad_norm": 0.054311103935317795, "learning_rate": 0.00019814756716095527, "loss": 0.8617, "step": 22910 }, { "epoch": 0.40239470496321916, "grad_norm": 0.06874681358245362, "learning_rate": 0.00019814542091819802, "loss": 0.848, "step": 22920 }, { "epoch": 0.4025702698432206, "grad_norm": 0.08566011020106062, "learning_rate": 0.0001981432734445897, "loss": 0.8619, "step": 22930 }, { "epoch": 0.40274583472322195, "grad_norm": 0.06129256068942554, "learning_rate": 0.00019814112474015748, "loss": 0.863, "step": 22940 }, { "epoch": 0.4029213996032234, "grad_norm": 0.06576799839777153, "learning_rate": 0.00019813897480492856, "loss": 0.8493, "step": 22950 }, { "epoch": 0.4030969644832248, "grad_norm": 0.06379649249292493, "learning_rate": 0.00019813682363893024, "loss": 0.8494, "step": 22960 }, { "epoch": 0.40327252936322616, "grad_norm": 0.05767916102258527, "learning_rate": 0.00019813467124218974, "loss": 0.8577, "step": 22970 }, { "epoch": 0.4034480942432276, "grad_norm": 0.09955837052707728, "learning_rate": 0.00019813251761473437, "loss": 0.8577, "step": 22980 }, { "epoch": 0.403623659123229, "grad_norm": 0.07192973605523031, "learning_rate": 0.00019813036275659137, "loss": 0.8677, "step": 22990 }, { "epoch": 0.4037992240032304, "grad_norm": 0.07040881016822204, "learning_rate": 0.00019812820666778805, "loss": 0.8615, "step": 23000 }, { "epoch": 0.4039747888832318, "grad_norm": 0.06434516149443532, "learning_rate": 0.00019812604934835176, "loss": 0.859, "step": 23010 }, { "epoch": 0.4041503537632332, "grad_norm": 0.0728057841446281, "learning_rate": 0.0001981238907983098, "loss": 0.8629, "step": 23020 }, { "epoch": 0.4043259186432346, "grad_norm": 0.06446347140312143, "learning_rate": 0.0001981217310176895, "loss": 0.856, "step": 23030 }, { "epoch": 0.404501483523236, "grad_norm": 0.06610854442636781, "learning_rate": 0.00019811957000651827, "loss": 0.8616, "step": 23040 }, { "epoch": 0.4046770484032374, "grad_norm": 0.07597897873744168, "learning_rate": 0.0001981174077648235, "loss": 0.8611, "step": 23050 }, { "epoch": 0.4048526132832388, "grad_norm": 0.08304798114336405, "learning_rate": 0.00019811524429263252, "loss": 0.856, "step": 23060 }, { "epoch": 0.4050281781632402, "grad_norm": 0.06690170552580567, "learning_rate": 0.0001981130795899728, "loss": 0.8633, "step": 23070 }, { "epoch": 0.40520374304324164, "grad_norm": 0.06214814791360192, "learning_rate": 0.00019811091365687174, "loss": 0.8597, "step": 23080 }, { "epoch": 0.40537930792324306, "grad_norm": 0.0682531381496847, "learning_rate": 0.00019810874649335683, "loss": 0.8604, "step": 23090 }, { "epoch": 0.4055548728032444, "grad_norm": 0.07244788784277467, "learning_rate": 0.00019810657809945546, "loss": 0.8569, "step": 23100 }, { "epoch": 0.40573043768324585, "grad_norm": 0.053262554953853104, "learning_rate": 0.00019810440847519516, "loss": 0.8486, "step": 23110 }, { "epoch": 0.40590600256324727, "grad_norm": 0.06049155014562685, "learning_rate": 0.0001981022376206034, "loss": 0.8599, "step": 23120 }, { "epoch": 0.40608156744324864, "grad_norm": 0.07696558092019924, "learning_rate": 0.00019810006553570763, "loss": 0.8545, "step": 23130 }, { "epoch": 0.40625713232325006, "grad_norm": 0.08459049347914498, "learning_rate": 0.00019809789222053546, "loss": 0.8471, "step": 23140 }, { "epoch": 0.4064326972032515, "grad_norm": 0.07671822724747164, "learning_rate": 0.00019809571767511437, "loss": 0.8593, "step": 23150 }, { "epoch": 0.40660826208325285, "grad_norm": 0.08290844686529619, "learning_rate": 0.00019809354189947195, "loss": 0.8521, "step": 23160 }, { "epoch": 0.40678382696325427, "grad_norm": 0.06079841000562817, "learning_rate": 0.00019809136489363574, "loss": 0.8587, "step": 23170 }, { "epoch": 0.4069593918432557, "grad_norm": 0.06199859737677302, "learning_rate": 0.00019808918665763329, "loss": 0.8634, "step": 23180 }, { "epoch": 0.40713495672325706, "grad_norm": 0.06752971372726763, "learning_rate": 0.00019808700719149228, "loss": 0.8534, "step": 23190 }, { "epoch": 0.4073105216032585, "grad_norm": 0.05583540013997792, "learning_rate": 0.00019808482649524029, "loss": 0.8592, "step": 23200 }, { "epoch": 0.4074860864832599, "grad_norm": 0.08183497137954088, "learning_rate": 0.0001980826445689049, "loss": 0.8527, "step": 23210 }, { "epoch": 0.40766165136326127, "grad_norm": 0.06773310707747451, "learning_rate": 0.00019808046141251383, "loss": 0.8553, "step": 23220 }, { "epoch": 0.4078372162432627, "grad_norm": 0.06276681207705936, "learning_rate": 0.0001980782770260947, "loss": 0.8567, "step": 23230 }, { "epoch": 0.4080127811232641, "grad_norm": 0.06662299174138643, "learning_rate": 0.00019807609140967518, "loss": 0.8571, "step": 23240 }, { "epoch": 0.40818834600326553, "grad_norm": 0.08451406513935299, "learning_rate": 0.00019807390456328297, "loss": 0.8571, "step": 23250 }, { "epoch": 0.4083639108832669, "grad_norm": 0.055398298747036834, "learning_rate": 0.00019807171648694583, "loss": 0.8476, "step": 23260 }, { "epoch": 0.4085394757632683, "grad_norm": 0.07378088655209167, "learning_rate": 0.00019806952718069143, "loss": 0.8567, "step": 23270 }, { "epoch": 0.40871504064326974, "grad_norm": 0.07688034950647044, "learning_rate": 0.00019806733664454748, "loss": 0.8696, "step": 23280 }, { "epoch": 0.4088906055232711, "grad_norm": 0.057814325841919374, "learning_rate": 0.0001980651448785418, "loss": 0.8604, "step": 23290 }, { "epoch": 0.40906617040327253, "grad_norm": 0.07056077654422281, "learning_rate": 0.00019806295188270215, "loss": 0.8577, "step": 23300 }, { "epoch": 0.40924173528327396, "grad_norm": 0.05818722992951602, "learning_rate": 0.00019806075765705625, "loss": 0.8459, "step": 23310 }, { "epoch": 0.4094173001632753, "grad_norm": 0.08275198239650826, "learning_rate": 0.000198058562201632, "loss": 0.8522, "step": 23320 }, { "epoch": 0.40959286504327674, "grad_norm": 0.06688531292575486, "learning_rate": 0.0001980563655164571, "loss": 0.8532, "step": 23330 }, { "epoch": 0.40976842992327817, "grad_norm": 0.0554119671518036, "learning_rate": 0.00019805416760155947, "loss": 0.8577, "step": 23340 }, { "epoch": 0.40994399480327953, "grad_norm": 0.052037903308334826, "learning_rate": 0.000198051968456967, "loss": 0.8611, "step": 23350 }, { "epoch": 0.41011955968328095, "grad_norm": 0.08773803305464657, "learning_rate": 0.0001980497680827074, "loss": 0.8583, "step": 23360 }, { "epoch": 0.4102951245632824, "grad_norm": 0.10496633317947089, "learning_rate": 0.00019804756647880868, "loss": 0.8664, "step": 23370 }, { "epoch": 0.41047068944328374, "grad_norm": 0.07247083140870463, "learning_rate": 0.0001980453636452987, "loss": 0.8525, "step": 23380 }, { "epoch": 0.41064625432328516, "grad_norm": 0.05729821609218918, "learning_rate": 0.00019804315958220537, "loss": 0.8538, "step": 23390 }, { "epoch": 0.4108218192032866, "grad_norm": 0.08807434262535596, "learning_rate": 0.0001980409542895566, "loss": 0.8604, "step": 23400 }, { "epoch": 0.41099738408328795, "grad_norm": 0.07372357930039504, "learning_rate": 0.00019803874776738034, "loss": 0.8563, "step": 23410 }, { "epoch": 0.4111729489632894, "grad_norm": 0.0983103111612344, "learning_rate": 0.00019803654001570456, "loss": 0.8482, "step": 23420 }, { "epoch": 0.4113485138432908, "grad_norm": 0.0633742441224773, "learning_rate": 0.0001980343310345572, "loss": 0.8551, "step": 23430 }, { "epoch": 0.4115240787232922, "grad_norm": 0.06111901912931253, "learning_rate": 0.00019803212082396626, "loss": 0.8564, "step": 23440 }, { "epoch": 0.4116996436032936, "grad_norm": 0.06890547478422795, "learning_rate": 0.00019802990938395977, "loss": 0.8615, "step": 23450 }, { "epoch": 0.411875208483295, "grad_norm": 0.06676508183337429, "learning_rate": 0.00019802769671456577, "loss": 0.8573, "step": 23460 }, { "epoch": 0.41205077336329643, "grad_norm": 0.057337838080242474, "learning_rate": 0.0001980254828158122, "loss": 0.8526, "step": 23470 }, { "epoch": 0.4122263382432978, "grad_norm": 0.06936469850866271, "learning_rate": 0.0001980232676877272, "loss": 0.8556, "step": 23480 }, { "epoch": 0.4124019031232992, "grad_norm": 0.07213550100036975, "learning_rate": 0.0001980210513303388, "loss": 0.8565, "step": 23490 }, { "epoch": 0.41257746800330064, "grad_norm": 0.06588595064391055, "learning_rate": 0.00019801883374367507, "loss": 0.852, "step": 23500 }, { "epoch": 0.412753032883302, "grad_norm": 0.06792203364631942, "learning_rate": 0.00019801661492776411, "loss": 0.8616, "step": 23510 }, { "epoch": 0.41292859776330343, "grad_norm": 0.06392304677940171, "learning_rate": 0.0001980143948826341, "loss": 0.857, "step": 23520 }, { "epoch": 0.41310416264330485, "grad_norm": 0.06556644318623903, "learning_rate": 0.00019801217360831307, "loss": 0.8504, "step": 23530 }, { "epoch": 0.4132797275233062, "grad_norm": 0.058328382888417414, "learning_rate": 0.00019800995110482922, "loss": 0.8546, "step": 23540 }, { "epoch": 0.41345529240330764, "grad_norm": 0.04814563181147783, "learning_rate": 0.0001980077273722107, "loss": 0.858, "step": 23550 }, { "epoch": 0.41363085728330906, "grad_norm": 0.0778680371884089, "learning_rate": 0.00019800550241048568, "loss": 0.8562, "step": 23560 }, { "epoch": 0.41380642216331043, "grad_norm": 0.07468572742662002, "learning_rate": 0.00019800327621968236, "loss": 0.852, "step": 23570 }, { "epoch": 0.41398198704331185, "grad_norm": 0.05148370277107974, "learning_rate": 0.0001980010487998289, "loss": 0.855, "step": 23580 }, { "epoch": 0.41415755192331327, "grad_norm": 0.0996252565913877, "learning_rate": 0.00019799882015095362, "loss": 0.8484, "step": 23590 }, { "epoch": 0.41433311680331464, "grad_norm": 0.09525798434414152, "learning_rate": 0.00019799659027308465, "loss": 0.8574, "step": 23600 }, { "epoch": 0.41450868168331606, "grad_norm": 0.10306924100013512, "learning_rate": 0.00019799435916625032, "loss": 0.8542, "step": 23610 }, { "epoch": 0.4146842465633175, "grad_norm": 0.06390334941123062, "learning_rate": 0.0001979921268304789, "loss": 0.8524, "step": 23620 }, { "epoch": 0.4148598114433189, "grad_norm": 0.07701891841391194, "learning_rate": 0.0001979898932657986, "loss": 0.85, "step": 23630 }, { "epoch": 0.41503537632332027, "grad_norm": 0.09441398741056316, "learning_rate": 0.00019798765847223778, "loss": 0.8486, "step": 23640 }, { "epoch": 0.4152109412033217, "grad_norm": 0.05119082010188207, "learning_rate": 0.00019798542244982474, "loss": 0.8615, "step": 23650 }, { "epoch": 0.4153865060833231, "grad_norm": 0.05558081175280894, "learning_rate": 0.0001979831851985878, "loss": 0.8568, "step": 23660 }, { "epoch": 0.4155620709633245, "grad_norm": 0.06970046611831716, "learning_rate": 0.00019798094671855533, "loss": 0.8566, "step": 23670 }, { "epoch": 0.4157376358433259, "grad_norm": 0.07299640180291771, "learning_rate": 0.00019797870700975564, "loss": 0.8594, "step": 23680 }, { "epoch": 0.4159132007233273, "grad_norm": 0.06899319342372974, "learning_rate": 0.0001979764660722172, "loss": 0.8503, "step": 23690 }, { "epoch": 0.4160887656033287, "grad_norm": 0.07570135450171334, "learning_rate": 0.00019797422390596832, "loss": 0.8531, "step": 23700 }, { "epoch": 0.4162643304833301, "grad_norm": 0.06165927780660987, "learning_rate": 0.00019797198051103745, "loss": 0.8657, "step": 23710 }, { "epoch": 0.41643989536333154, "grad_norm": 0.07822418431374392, "learning_rate": 0.00019796973588745299, "loss": 0.866, "step": 23720 }, { "epoch": 0.4166154602433329, "grad_norm": 0.05807130075435765, "learning_rate": 0.0001979674900352434, "loss": 0.8645, "step": 23730 }, { "epoch": 0.4167910251233343, "grad_norm": 0.04995825621534916, "learning_rate": 0.00019796524295443712, "loss": 0.849, "step": 23740 }, { "epoch": 0.41696659000333575, "grad_norm": 0.06756585321124141, "learning_rate": 0.00019796299464506264, "loss": 0.8536, "step": 23750 }, { "epoch": 0.4171421548833371, "grad_norm": 0.06853167609873134, "learning_rate": 0.0001979607451071484, "loss": 0.8577, "step": 23760 }, { "epoch": 0.41731771976333853, "grad_norm": 0.07273940248672528, "learning_rate": 0.000197958494340723, "loss": 0.8583, "step": 23770 }, { "epoch": 0.41749328464333996, "grad_norm": 0.049919461322572155, "learning_rate": 0.00019795624234581486, "loss": 0.8582, "step": 23780 }, { "epoch": 0.4176688495233413, "grad_norm": 0.0563739857635172, "learning_rate": 0.00019795398912245254, "loss": 0.8559, "step": 23790 }, { "epoch": 0.41784441440334275, "grad_norm": 0.07150476180406265, "learning_rate": 0.00019795173467066458, "loss": 0.8554, "step": 23800 }, { "epoch": 0.41801997928334417, "grad_norm": 0.09252759659178214, "learning_rate": 0.00019794947899047963, "loss": 0.857, "step": 23810 }, { "epoch": 0.4181955441633456, "grad_norm": 0.06186291373486963, "learning_rate": 0.00019794722208192617, "loss": 0.8532, "step": 23820 }, { "epoch": 0.41837110904334696, "grad_norm": 0.11780658268729939, "learning_rate": 0.00019794496394503276, "loss": 0.8605, "step": 23830 }, { "epoch": 0.4185466739233484, "grad_norm": 0.0690304600417895, "learning_rate": 0.00019794270457982818, "loss": 0.8611, "step": 23840 }, { "epoch": 0.4187222388033498, "grad_norm": 0.0663812311824687, "learning_rate": 0.00019794044398634088, "loss": 0.8556, "step": 23850 }, { "epoch": 0.41889780368335117, "grad_norm": 0.11918954700709052, "learning_rate": 0.0001979381821645996, "loss": 0.8617, "step": 23860 }, { "epoch": 0.4190733685633526, "grad_norm": 0.07570436378591087, "learning_rate": 0.00019793591911463298, "loss": 0.853, "step": 23870 }, { "epoch": 0.419248933443354, "grad_norm": 0.07316220843400868, "learning_rate": 0.00019793365483646968, "loss": 0.8539, "step": 23880 }, { "epoch": 0.4194244983233554, "grad_norm": 0.06552309597771855, "learning_rate": 0.0001979313893301384, "loss": 0.8576, "step": 23890 }, { "epoch": 0.4196000632033568, "grad_norm": 0.07657497613704695, "learning_rate": 0.00019792912259566784, "loss": 0.8537, "step": 23900 }, { "epoch": 0.4197756280833582, "grad_norm": 0.06425175581475873, "learning_rate": 0.0001979268546330867, "loss": 0.8521, "step": 23910 }, { "epoch": 0.4199511929633596, "grad_norm": 0.0859316711663665, "learning_rate": 0.00019792458544242377, "loss": 0.8533, "step": 23920 }, { "epoch": 0.420126757843361, "grad_norm": 0.06280677874823465, "learning_rate": 0.00019792231502370778, "loss": 0.8522, "step": 23930 }, { "epoch": 0.42030232272336243, "grad_norm": 0.07362405229663191, "learning_rate": 0.00019792004337696744, "loss": 0.855, "step": 23940 }, { "epoch": 0.4204778876033638, "grad_norm": 0.10061992476206615, "learning_rate": 0.00019791777050223158, "loss": 0.8511, "step": 23950 }, { "epoch": 0.4206534524833652, "grad_norm": 0.06408570687176796, "learning_rate": 0.00019791549639952902, "loss": 0.8502, "step": 23960 }, { "epoch": 0.42082901736336664, "grad_norm": 0.06815544627426995, "learning_rate": 0.0001979132210688885, "loss": 0.8522, "step": 23970 }, { "epoch": 0.42100458224336806, "grad_norm": 0.07302871359728337, "learning_rate": 0.00019791094451033895, "loss": 0.8537, "step": 23980 }, { "epoch": 0.42118014712336943, "grad_norm": 0.06933045327223912, "learning_rate": 0.00019790866672390913, "loss": 0.8615, "step": 23990 }, { "epoch": 0.42135571200337085, "grad_norm": 0.09606811423185657, "learning_rate": 0.00019790638770962794, "loss": 0.8451, "step": 24000 }, { "epoch": 0.4215312768833723, "grad_norm": 0.08293317891856797, "learning_rate": 0.00019790410746752427, "loss": 0.8557, "step": 24010 }, { "epoch": 0.42170684176337364, "grad_norm": 0.08624193602795534, "learning_rate": 0.00019790182599762693, "loss": 0.8536, "step": 24020 }, { "epoch": 0.42188240664337506, "grad_norm": 0.06311840964815427, "learning_rate": 0.0001978995432999649, "loss": 0.8555, "step": 24030 }, { "epoch": 0.4220579715233765, "grad_norm": 0.05856072285662311, "learning_rate": 0.00019789725937456715, "loss": 0.8514, "step": 24040 }, { "epoch": 0.42223353640337785, "grad_norm": 0.059146073676464815, "learning_rate": 0.00019789497422146248, "loss": 0.8561, "step": 24050 }, { "epoch": 0.4224091012833793, "grad_norm": 0.07215384845719484, "learning_rate": 0.00019789268784067994, "loss": 0.8539, "step": 24060 }, { "epoch": 0.4225846661633807, "grad_norm": 0.0641524063642619, "learning_rate": 0.00019789040023224848, "loss": 0.8577, "step": 24070 }, { "epoch": 0.42276023104338206, "grad_norm": 0.06857747151826586, "learning_rate": 0.00019788811139619708, "loss": 0.8555, "step": 24080 }, { "epoch": 0.4229357959233835, "grad_norm": 0.0622270363425274, "learning_rate": 0.00019788582133255472, "loss": 0.8623, "step": 24090 }, { "epoch": 0.4231113608033849, "grad_norm": 0.05556365992968483, "learning_rate": 0.00019788353004135045, "loss": 0.8534, "step": 24100 }, { "epoch": 0.4232869256833863, "grad_norm": 0.06184879503846866, "learning_rate": 0.00019788123752261327, "loss": 0.8519, "step": 24110 }, { "epoch": 0.4234624905633877, "grad_norm": 0.08369938904904543, "learning_rate": 0.00019787894377637226, "loss": 0.8544, "step": 24120 }, { "epoch": 0.4236380554433891, "grad_norm": 0.06664902020457696, "learning_rate": 0.0001978766488026565, "loss": 0.8552, "step": 24130 }, { "epoch": 0.4238136203233905, "grad_norm": 0.07079367411446787, "learning_rate": 0.000197874352601495, "loss": 0.8606, "step": 24140 }, { "epoch": 0.4239891852033919, "grad_norm": 0.07556567699870627, "learning_rate": 0.00019787205517291689, "loss": 0.8575, "step": 24150 }, { "epoch": 0.4241647500833933, "grad_norm": 0.07569680511052704, "learning_rate": 0.00019786975651695126, "loss": 0.849, "step": 24160 }, { "epoch": 0.42434031496339475, "grad_norm": 0.08157585971048129, "learning_rate": 0.00019786745663362727, "loss": 0.8575, "step": 24170 }, { "epoch": 0.4245158798433961, "grad_norm": 0.046014629669633336, "learning_rate": 0.00019786515552297403, "loss": 0.8566, "step": 24180 }, { "epoch": 0.42469144472339754, "grad_norm": 0.0661294329747174, "learning_rate": 0.0001978628531850207, "loss": 0.8569, "step": 24190 }, { "epoch": 0.42486700960339896, "grad_norm": 0.07763000325479355, "learning_rate": 0.00019786054961979646, "loss": 0.8464, "step": 24200 }, { "epoch": 0.4250425744834003, "grad_norm": 0.07449889897229006, "learning_rate": 0.0001978582448273305, "loss": 0.8556, "step": 24210 }, { "epoch": 0.42521813936340175, "grad_norm": 0.05360801043814137, "learning_rate": 0.00019785593880765202, "loss": 0.8596, "step": 24220 }, { "epoch": 0.42539370424340317, "grad_norm": 0.06589127621124076, "learning_rate": 0.00019785363156079021, "loss": 0.8566, "step": 24230 }, { "epoch": 0.42556926912340454, "grad_norm": 0.06407633192586366, "learning_rate": 0.00019785132308677438, "loss": 0.8507, "step": 24240 }, { "epoch": 0.42574483400340596, "grad_norm": 0.07199896498509271, "learning_rate": 0.00019784901338563363, "loss": 0.8514, "step": 24250 }, { "epoch": 0.4259203988834074, "grad_norm": 0.05660652428213764, "learning_rate": 0.0001978467024573974, "loss": 0.8595, "step": 24260 }, { "epoch": 0.42609596376340875, "grad_norm": 0.05541259350959857, "learning_rate": 0.00019784439030209484, "loss": 0.8508, "step": 24270 }, { "epoch": 0.42627152864341017, "grad_norm": 0.05970333823542039, "learning_rate": 0.00019784207691975534, "loss": 0.867, "step": 24280 }, { "epoch": 0.4264470935234116, "grad_norm": 0.06804668792554719, "learning_rate": 0.00019783976231040816, "loss": 0.8565, "step": 24290 }, { "epoch": 0.42662265840341296, "grad_norm": 0.10429915867835482, "learning_rate": 0.0001978374464740826, "loss": 0.8483, "step": 24300 }, { "epoch": 0.4267982232834144, "grad_norm": 0.05944180200713313, "learning_rate": 0.00019783512941080807, "loss": 0.8538, "step": 24310 }, { "epoch": 0.4269737881634158, "grad_norm": 0.05330582456695055, "learning_rate": 0.00019783281112061384, "loss": 0.8638, "step": 24320 }, { "epoch": 0.42714935304341717, "grad_norm": 0.05397754504394192, "learning_rate": 0.0001978304916035294, "loss": 0.8604, "step": 24330 }, { "epoch": 0.4273249179234186, "grad_norm": 0.05588666231909027, "learning_rate": 0.00019782817085958404, "loss": 0.8606, "step": 24340 }, { "epoch": 0.42750048280342, "grad_norm": 0.08578383176992048, "learning_rate": 0.0001978258488888072, "loss": 0.8574, "step": 24350 }, { "epoch": 0.42767604768342143, "grad_norm": 0.06438015996916513, "learning_rate": 0.0001978235256912283, "loss": 0.8559, "step": 24360 }, { "epoch": 0.4278516125634228, "grad_norm": 0.07389407006925401, "learning_rate": 0.00019782120126687674, "loss": 0.8598, "step": 24370 }, { "epoch": 0.4280271774434242, "grad_norm": 0.0719480130256265, "learning_rate": 0.00019781887561578208, "loss": 0.8595, "step": 24380 }, { "epoch": 0.42820274232342564, "grad_norm": 0.06631055254040422, "learning_rate": 0.00019781654873797367, "loss": 0.8577, "step": 24390 }, { "epoch": 0.428378307203427, "grad_norm": 0.06158103187432808, "learning_rate": 0.00019781422063348104, "loss": 0.8492, "step": 24400 }, { "epoch": 0.42855387208342843, "grad_norm": 0.05956110387560395, "learning_rate": 0.00019781189130233366, "loss": 0.8576, "step": 24410 }, { "epoch": 0.42872943696342986, "grad_norm": 0.08201524126342943, "learning_rate": 0.00019780956074456107, "loss": 0.8527, "step": 24420 }, { "epoch": 0.4289050018434312, "grad_norm": 0.08272910798076559, "learning_rate": 0.0001978072289601928, "loss": 0.8533, "step": 24430 }, { "epoch": 0.42908056672343264, "grad_norm": 0.07973105340418299, "learning_rate": 0.00019780489594925838, "loss": 0.8546, "step": 24440 }, { "epoch": 0.42925613160343407, "grad_norm": 0.05737166704713955, "learning_rate": 0.0001978025617117874, "loss": 0.853, "step": 24450 }, { "epoch": 0.42943169648343543, "grad_norm": 0.08757422527546417, "learning_rate": 0.00019780022624780937, "loss": 0.863, "step": 24460 }, { "epoch": 0.42960726136343685, "grad_norm": 0.06569540755359904, "learning_rate": 0.00019779788955735396, "loss": 0.8547, "step": 24470 }, { "epoch": 0.4297828262434383, "grad_norm": 0.09818175352279988, "learning_rate": 0.00019779555164045073, "loss": 0.8554, "step": 24480 }, { "epoch": 0.42995839112343964, "grad_norm": 0.066972859743233, "learning_rate": 0.00019779321249712925, "loss": 0.849, "step": 24490 }, { "epoch": 0.43013395600344106, "grad_norm": 0.06215760135249169, "learning_rate": 0.0001977908721274193, "loss": 0.862, "step": 24500 }, { "epoch": 0.4303095208834425, "grad_norm": 0.06780205736970095, "learning_rate": 0.0001977885305313504, "loss": 0.8589, "step": 24510 }, { "epoch": 0.43048508576344385, "grad_norm": 0.07969857247895247, "learning_rate": 0.00019778618770895226, "loss": 0.8557, "step": 24520 }, { "epoch": 0.4306606506434453, "grad_norm": 0.05729831808063077, "learning_rate": 0.00019778384366025463, "loss": 0.8659, "step": 24530 }, { "epoch": 0.4308362155234467, "grad_norm": 0.08522407823094233, "learning_rate": 0.00019778149838528707, "loss": 0.8569, "step": 24540 }, { "epoch": 0.4310117804034481, "grad_norm": 0.06291241280985671, "learning_rate": 0.00019777915188407946, "loss": 0.8476, "step": 24550 }, { "epoch": 0.4311873452834495, "grad_norm": 0.064078293755528, "learning_rate": 0.0001977768041566614, "loss": 0.8569, "step": 24560 }, { "epoch": 0.4313629101634509, "grad_norm": 0.07934553902905082, "learning_rate": 0.0001977744552030627, "loss": 0.8614, "step": 24570 }, { "epoch": 0.43153847504345233, "grad_norm": 0.06585583672621256, "learning_rate": 0.0001977721050233131, "loss": 0.8577, "step": 24580 }, { "epoch": 0.4317140399234537, "grad_norm": 0.08440985147938669, "learning_rate": 0.00019776975361744241, "loss": 0.8607, "step": 24590 }, { "epoch": 0.4318896048034551, "grad_norm": 0.07097727632745177, "learning_rate": 0.00019776740098548037, "loss": 0.8579, "step": 24600 }, { "epoch": 0.43206516968345654, "grad_norm": 0.05153926768134646, "learning_rate": 0.00019776504712745683, "loss": 0.8558, "step": 24610 }, { "epoch": 0.4322407345634579, "grad_norm": 0.06209039092930526, "learning_rate": 0.0001977626920434016, "loss": 0.8637, "step": 24620 }, { "epoch": 0.43241629944345933, "grad_norm": 0.066468961530916, "learning_rate": 0.00019776033573334448, "loss": 0.8539, "step": 24630 }, { "epoch": 0.43259186432346075, "grad_norm": 0.07869399171339564, "learning_rate": 0.0001977579781973154, "loss": 0.8691, "step": 24640 }, { "epoch": 0.4327674292034621, "grad_norm": 0.06666957214204834, "learning_rate": 0.00019775561943534417, "loss": 0.8532, "step": 24650 }, { "epoch": 0.43294299408346354, "grad_norm": 0.05606149810756, "learning_rate": 0.0001977532594474607, "loss": 0.8595, "step": 24660 }, { "epoch": 0.43311855896346496, "grad_norm": 0.06433590181686363, "learning_rate": 0.00019775089823369487, "loss": 0.8584, "step": 24670 }, { "epoch": 0.43329412384346633, "grad_norm": 0.06980280422862005, "learning_rate": 0.00019774853579407667, "loss": 0.8553, "step": 24680 }, { "epoch": 0.43346968872346775, "grad_norm": 0.07591350929000831, "learning_rate": 0.00019774617212863594, "loss": 0.8562, "step": 24690 }, { "epoch": 0.43364525360346917, "grad_norm": 0.07207398510280955, "learning_rate": 0.0001977438072374027, "loss": 0.851, "step": 24700 }, { "epoch": 0.4338208184834706, "grad_norm": 0.07377362253681613, "learning_rate": 0.00019774144112040686, "loss": 0.8557, "step": 24710 }, { "epoch": 0.43399638336347196, "grad_norm": 0.07853337544157342, "learning_rate": 0.0001977390737776784, "loss": 0.857, "step": 24720 }, { "epoch": 0.4341719482434734, "grad_norm": 0.06417901015734073, "learning_rate": 0.0001977367052092474, "loss": 0.8487, "step": 24730 }, { "epoch": 0.4343475131234748, "grad_norm": 0.08066824091943287, "learning_rate": 0.00019773433541514374, "loss": 0.8554, "step": 24740 }, { "epoch": 0.43452307800347617, "grad_norm": 0.07794390508993439, "learning_rate": 0.00019773196439539756, "loss": 0.8526, "step": 24750 }, { "epoch": 0.4346986428834776, "grad_norm": 0.06903696165643244, "learning_rate": 0.0001977295921500388, "loss": 0.8532, "step": 24760 }, { "epoch": 0.434874207763479, "grad_norm": 0.05875076967552137, "learning_rate": 0.00019772721867909762, "loss": 0.8551, "step": 24770 }, { "epoch": 0.4350497726434804, "grad_norm": 0.04352285108165102, "learning_rate": 0.00019772484398260403, "loss": 0.8601, "step": 24780 }, { "epoch": 0.4352253375234818, "grad_norm": 0.058180277393845424, "learning_rate": 0.0001977224680605881, "loss": 0.8563, "step": 24790 }, { "epoch": 0.4354009024034832, "grad_norm": 0.07881379105896263, "learning_rate": 0.00019772009091307999, "loss": 0.8545, "step": 24800 }, { "epoch": 0.4355764672834846, "grad_norm": 0.0724083084779048, "learning_rate": 0.0001977177125401098, "loss": 0.8519, "step": 24810 }, { "epoch": 0.435752032163486, "grad_norm": 0.0605052291363654, "learning_rate": 0.0001977153329417076, "loss": 0.862, "step": 24820 }, { "epoch": 0.43592759704348744, "grad_norm": 0.0749075434916469, "learning_rate": 0.00019771295211790365, "loss": 0.856, "step": 24830 }, { "epoch": 0.4361031619234888, "grad_norm": 0.0738487440995315, "learning_rate": 0.00019771057006872804, "loss": 0.8589, "step": 24840 }, { "epoch": 0.4362787268034902, "grad_norm": 0.062425040392400495, "learning_rate": 0.000197708186794211, "loss": 0.8549, "step": 24850 }, { "epoch": 0.43645429168349165, "grad_norm": 0.08140037484256551, "learning_rate": 0.00019770580229438268, "loss": 0.8507, "step": 24860 }, { "epoch": 0.436629856563493, "grad_norm": 0.057053769269239205, "learning_rate": 0.0001977034165692733, "loss": 0.8623, "step": 24870 }, { "epoch": 0.43680542144349443, "grad_norm": 0.08048341813481333, "learning_rate": 0.0001977010296189131, "loss": 0.8595, "step": 24880 }, { "epoch": 0.43698098632349586, "grad_norm": 0.08047620808888586, "learning_rate": 0.00019769864144333235, "loss": 0.8588, "step": 24890 }, { "epoch": 0.4371565512034973, "grad_norm": 0.07657836732185166, "learning_rate": 0.00019769625204256125, "loss": 0.8659, "step": 24900 }, { "epoch": 0.43733211608349865, "grad_norm": 0.06542083584403642, "learning_rate": 0.00019769386141663014, "loss": 0.8507, "step": 24910 }, { "epoch": 0.43750768096350007, "grad_norm": 0.07001080707681787, "learning_rate": 0.00019769146956556924, "loss": 0.86, "step": 24920 }, { "epoch": 0.4376832458435015, "grad_norm": 0.06424802703006366, "learning_rate": 0.0001976890764894089, "loss": 0.8542, "step": 24930 }, { "epoch": 0.43785881072350286, "grad_norm": 0.06748385489499933, "learning_rate": 0.00019768668218817943, "loss": 0.8548, "step": 24940 }, { "epoch": 0.4380343756035043, "grad_norm": 0.06929843485827702, "learning_rate": 0.00019768428666191117, "loss": 0.8536, "step": 24950 }, { "epoch": 0.4382099404835057, "grad_norm": 0.0467498812454557, "learning_rate": 0.00019768188991063447, "loss": 0.8525, "step": 24960 }, { "epoch": 0.43838550536350707, "grad_norm": 0.06193147680844693, "learning_rate": 0.00019767949193437968, "loss": 0.8467, "step": 24970 }, { "epoch": 0.4385610702435085, "grad_norm": 0.05893836999071017, "learning_rate": 0.00019767709273317726, "loss": 0.849, "step": 24980 }, { "epoch": 0.4387366351235099, "grad_norm": 0.08345229110028242, "learning_rate": 0.00019767469230705747, "loss": 0.8597, "step": 24990 }, { "epoch": 0.4389122000035113, "grad_norm": 0.0712881624204767, "learning_rate": 0.00019767229065605085, "loss": 0.8591, "step": 25000 }, { "epoch": 0.4390877648835127, "grad_norm": 0.06603382551614456, "learning_rate": 0.00019766988778018772, "loss": 0.8564, "step": 25010 }, { "epoch": 0.4392633297635141, "grad_norm": 0.0842370316671665, "learning_rate": 0.00019766748367949866, "loss": 0.8519, "step": 25020 }, { "epoch": 0.4394388946435155, "grad_norm": 0.06305657141043847, "learning_rate": 0.000197665078354014, "loss": 0.8519, "step": 25030 }, { "epoch": 0.4396144595235169, "grad_norm": 0.0704706897482048, "learning_rate": 0.0001976626718037643, "loss": 0.8558, "step": 25040 }, { "epoch": 0.43979002440351833, "grad_norm": 0.07219280069631484, "learning_rate": 0.00019766026402878001, "loss": 0.8476, "step": 25050 }, { "epoch": 0.4399655892835197, "grad_norm": 0.060793173723770286, "learning_rate": 0.00019765785502909165, "loss": 0.8554, "step": 25060 }, { "epoch": 0.4401411541635211, "grad_norm": 0.0640619126753617, "learning_rate": 0.00019765544480472976, "loss": 0.855, "step": 25070 }, { "epoch": 0.44031671904352254, "grad_norm": 0.07624633153976491, "learning_rate": 0.00019765303335572484, "loss": 0.8611, "step": 25080 }, { "epoch": 0.44049228392352396, "grad_norm": 0.07693357677421128, "learning_rate": 0.00019765062068210747, "loss": 0.8544, "step": 25090 }, { "epoch": 0.44066784880352533, "grad_norm": 0.06625169153846602, "learning_rate": 0.00019764820678390822, "loss": 0.8585, "step": 25100 }, { "epoch": 0.44084341368352675, "grad_norm": 0.067079881020378, "learning_rate": 0.00019764579166115767, "loss": 0.8582, "step": 25110 }, { "epoch": 0.4410189785635282, "grad_norm": 0.06620556557505951, "learning_rate": 0.0001976433753138864, "loss": 0.8635, "step": 25120 }, { "epoch": 0.44119454344352954, "grad_norm": 0.058579842998731535, "learning_rate": 0.00019764095774212507, "loss": 0.8527, "step": 25130 }, { "epoch": 0.44137010832353096, "grad_norm": 0.05750106598772731, "learning_rate": 0.00019763853894590424, "loss": 0.8583, "step": 25140 }, { "epoch": 0.4415456732035324, "grad_norm": 0.1491795576181004, "learning_rate": 0.00019763611892525466, "loss": 0.8576, "step": 25150 }, { "epoch": 0.44172123808353375, "grad_norm": 0.08800184052776842, "learning_rate": 0.00019763369768020688, "loss": 0.8594, "step": 25160 }, { "epoch": 0.4418968029635352, "grad_norm": 0.13714712337089133, "learning_rate": 0.00019763127521079163, "loss": 0.8619, "step": 25170 }, { "epoch": 0.4420723678435366, "grad_norm": 0.06827995960148658, "learning_rate": 0.00019762885151703964, "loss": 0.8592, "step": 25180 }, { "epoch": 0.44224793272353796, "grad_norm": 0.08026320836140254, "learning_rate": 0.0001976264265989816, "loss": 0.8558, "step": 25190 }, { "epoch": 0.4424234976035394, "grad_norm": 0.06525160807289564, "learning_rate": 0.00019762400045664818, "loss": 0.8488, "step": 25200 }, { "epoch": 0.4425990624835408, "grad_norm": 0.09080795514481625, "learning_rate": 0.00019762157309007015, "loss": 0.8561, "step": 25210 }, { "epoch": 0.4427746273635422, "grad_norm": 0.09034003716921575, "learning_rate": 0.00019761914449927827, "loss": 0.8529, "step": 25220 }, { "epoch": 0.4429501922435436, "grad_norm": 0.058979142958439014, "learning_rate": 0.00019761671468430334, "loss": 0.8559, "step": 25230 }, { "epoch": 0.443125757123545, "grad_norm": 0.07504305655782251, "learning_rate": 0.0001976142836451761, "loss": 0.8559, "step": 25240 }, { "epoch": 0.4433013220035464, "grad_norm": 0.06808968071728645, "learning_rate": 0.00019761185138192738, "loss": 0.8612, "step": 25250 }, { "epoch": 0.4434768868835478, "grad_norm": 0.056719983063948945, "learning_rate": 0.00019760941789458798, "loss": 0.8468, "step": 25260 }, { "epoch": 0.4436524517635492, "grad_norm": 0.06982229476127343, "learning_rate": 0.00019760698318318874, "loss": 0.8533, "step": 25270 }, { "epoch": 0.44382801664355065, "grad_norm": 0.06469580772177011, "learning_rate": 0.00019760454724776052, "loss": 0.8538, "step": 25280 }, { "epoch": 0.444003581523552, "grad_norm": 0.06502776524755217, "learning_rate": 0.00019760211008833413, "loss": 0.8442, "step": 25290 }, { "epoch": 0.44417914640355344, "grad_norm": 0.059322857751290876, "learning_rate": 0.00019759967170494055, "loss": 0.8598, "step": 25300 }, { "epoch": 0.44435471128355486, "grad_norm": 0.0697747681813306, "learning_rate": 0.0001975972320976106, "loss": 0.8594, "step": 25310 }, { "epoch": 0.4445302761635562, "grad_norm": 0.05632276779391216, "learning_rate": 0.00019759479126637517, "loss": 0.8613, "step": 25320 }, { "epoch": 0.44470584104355765, "grad_norm": 0.06558599804187533, "learning_rate": 0.00019759234921126523, "loss": 0.8553, "step": 25330 }, { "epoch": 0.44488140592355907, "grad_norm": 0.06306544317595814, "learning_rate": 0.00019758990593231175, "loss": 0.8582, "step": 25340 }, { "epoch": 0.44505697080356044, "grad_norm": 0.060705804720247336, "learning_rate": 0.00019758746142954563, "loss": 0.8505, "step": 25350 }, { "epoch": 0.44523253568356186, "grad_norm": 0.0696494391347431, "learning_rate": 0.0001975850157029979, "loss": 0.8589, "step": 25360 }, { "epoch": 0.4454081005635633, "grad_norm": 0.06679106668523975, "learning_rate": 0.00019758256875269944, "loss": 0.8581, "step": 25370 }, { "epoch": 0.44558366544356465, "grad_norm": 0.06706216017145482, "learning_rate": 0.0001975801205786814, "loss": 0.8585, "step": 25380 }, { "epoch": 0.44575923032356607, "grad_norm": 0.055244073934571036, "learning_rate": 0.00019757767118097465, "loss": 0.8458, "step": 25390 }, { "epoch": 0.4459347952035675, "grad_norm": 0.0800053013612004, "learning_rate": 0.00019757522055961033, "loss": 0.853, "step": 25400 }, { "epoch": 0.44611036008356886, "grad_norm": 0.062089781646429115, "learning_rate": 0.00019757276871461944, "loss": 0.8544, "step": 25410 }, { "epoch": 0.4462859249635703, "grad_norm": 0.061152738431304275, "learning_rate": 0.0001975703156460331, "loss": 0.865, "step": 25420 }, { "epoch": 0.4464614898435717, "grad_norm": 0.0606194377806782, "learning_rate": 0.00019756786135388232, "loss": 0.8521, "step": 25430 }, { "epoch": 0.4466370547235731, "grad_norm": 0.06931937526935042, "learning_rate": 0.00019756540583819825, "loss": 0.8621, "step": 25440 }, { "epoch": 0.4468126196035745, "grad_norm": 0.06297235433212978, "learning_rate": 0.00019756294909901198, "loss": 0.8586, "step": 25450 }, { "epoch": 0.4469881844835759, "grad_norm": 0.06696533071229899, "learning_rate": 0.00019756049113635462, "loss": 0.8474, "step": 25460 }, { "epoch": 0.44716374936357733, "grad_norm": 0.06908007197884486, "learning_rate": 0.00019755803195025735, "loss": 0.8524, "step": 25470 }, { "epoch": 0.4473393142435787, "grad_norm": 0.06397453880434888, "learning_rate": 0.00019755557154075128, "loss": 0.8565, "step": 25480 }, { "epoch": 0.4475148791235801, "grad_norm": 0.056189337422617905, "learning_rate": 0.00019755310990786766, "loss": 0.8593, "step": 25490 }, { "epoch": 0.44769044400358154, "grad_norm": 0.06394630411033435, "learning_rate": 0.00019755064705163762, "loss": 0.8486, "step": 25500 }, { "epoch": 0.4478660088835829, "grad_norm": 0.0791930741062487, "learning_rate": 0.00019754818297209234, "loss": 0.8651, "step": 25510 }, { "epoch": 0.44804157376358433, "grad_norm": 0.080293269794989, "learning_rate": 0.00019754571766926314, "loss": 0.8591, "step": 25520 }, { "epoch": 0.44821713864358576, "grad_norm": 0.07119814004271376, "learning_rate": 0.00019754325114318115, "loss": 0.851, "step": 25530 }, { "epoch": 0.4483927035235871, "grad_norm": 0.0647928172840431, "learning_rate": 0.00019754078339387764, "loss": 0.8578, "step": 25540 }, { "epoch": 0.44856826840358854, "grad_norm": 0.06020072054287736, "learning_rate": 0.00019753831442138395, "loss": 0.8562, "step": 25550 }, { "epoch": 0.44874383328358997, "grad_norm": 0.057633668422608676, "learning_rate": 0.00019753584422573133, "loss": 0.8602, "step": 25560 }, { "epoch": 0.44891939816359133, "grad_norm": 0.08223096601723473, "learning_rate": 0.00019753337280695103, "loss": 0.8649, "step": 25570 }, { "epoch": 0.44909496304359275, "grad_norm": 0.08187224625018363, "learning_rate": 0.0001975309001650744, "loss": 0.8622, "step": 25580 }, { "epoch": 0.4492705279235942, "grad_norm": 0.06923147383890592, "learning_rate": 0.00019752842630013276, "loss": 0.8584, "step": 25590 }, { "epoch": 0.44944609280359554, "grad_norm": 0.0667927222782772, "learning_rate": 0.00019752595121215748, "loss": 0.8546, "step": 25600 }, { "epoch": 0.44962165768359696, "grad_norm": 0.12862016482726973, "learning_rate": 0.00019752347490117986, "loss": 0.8578, "step": 25610 }, { "epoch": 0.4497972225635984, "grad_norm": 0.06599878704117441, "learning_rate": 0.00019752099736723136, "loss": 0.8602, "step": 25620 }, { "epoch": 0.4499727874435998, "grad_norm": 0.06941542330152119, "learning_rate": 0.0001975185186103433, "loss": 0.8492, "step": 25630 }, { "epoch": 0.4501483523236012, "grad_norm": 0.07208484446943192, "learning_rate": 0.0001975160386305471, "loss": 0.8578, "step": 25640 }, { "epoch": 0.4503239172036026, "grad_norm": 0.06892861296834842, "learning_rate": 0.0001975135574278742, "loss": 0.8615, "step": 25650 }, { "epoch": 0.450499482083604, "grad_norm": 0.07325382632386332, "learning_rate": 0.0001975110750023561, "loss": 0.8582, "step": 25660 }, { "epoch": 0.4506750469636054, "grad_norm": 0.07046486183242773, "learning_rate": 0.00019750859135402412, "loss": 0.8603, "step": 25670 }, { "epoch": 0.4508506118436068, "grad_norm": 0.07717062669980136, "learning_rate": 0.00019750610648290977, "loss": 0.8605, "step": 25680 }, { "epoch": 0.45102617672360823, "grad_norm": 0.06852262225060798, "learning_rate": 0.00019750362038904458, "loss": 0.8555, "step": 25690 }, { "epoch": 0.4512017416036096, "grad_norm": 0.06855582232572903, "learning_rate": 0.00019750113307246, "loss": 0.8485, "step": 25700 }, { "epoch": 0.451377306483611, "grad_norm": 0.05936174089489748, "learning_rate": 0.00019749864453318762, "loss": 0.861, "step": 25710 }, { "epoch": 0.45155287136361244, "grad_norm": 0.05475049004134085, "learning_rate": 0.0001974961547712589, "loss": 0.8525, "step": 25720 }, { "epoch": 0.4517284362436138, "grad_norm": 0.06623534532309994, "learning_rate": 0.0001974936637867054, "loss": 0.8579, "step": 25730 }, { "epoch": 0.45190400112361523, "grad_norm": 0.06592493272481496, "learning_rate": 0.00019749117157955866, "loss": 0.8658, "step": 25740 }, { "epoch": 0.45207956600361665, "grad_norm": 0.0743160955462744, "learning_rate": 0.00019748867814985033, "loss": 0.861, "step": 25750 }, { "epoch": 0.452255130883618, "grad_norm": 0.06241954908832853, "learning_rate": 0.00019748618349761193, "loss": 0.8637, "step": 25760 }, { "epoch": 0.45243069576361944, "grad_norm": 0.10865288492804304, "learning_rate": 0.0001974836876228751, "loss": 0.8519, "step": 25770 }, { "epoch": 0.45260626064362086, "grad_norm": 0.06881262795555637, "learning_rate": 0.0001974811905256715, "loss": 0.8538, "step": 25780 }, { "epoch": 0.45278182552362223, "grad_norm": 0.06536248621961203, "learning_rate": 0.00019747869220603266, "loss": 0.8476, "step": 25790 }, { "epoch": 0.45295739040362365, "grad_norm": 0.06724865553377461, "learning_rate": 0.00019747619266399032, "loss": 0.8538, "step": 25800 }, { "epoch": 0.45313295528362507, "grad_norm": 0.07225148646015678, "learning_rate": 0.00019747369189957614, "loss": 0.8637, "step": 25810 }, { "epoch": 0.4533085201636265, "grad_norm": 0.08821475884262162, "learning_rate": 0.00019747118991282178, "loss": 0.8535, "step": 25820 }, { "epoch": 0.45348408504362786, "grad_norm": 0.050823989860526976, "learning_rate": 0.00019746868670375895, "loss": 0.8483, "step": 25830 }, { "epoch": 0.4536596499236293, "grad_norm": 0.06272377424180864, "learning_rate": 0.0001974661822724194, "loss": 0.8536, "step": 25840 }, { "epoch": 0.4538352148036307, "grad_norm": 0.060015026135441164, "learning_rate": 0.00019746367661883482, "loss": 0.8616, "step": 25850 }, { "epoch": 0.45401077968363207, "grad_norm": 0.06258378552554607, "learning_rate": 0.00019746116974303697, "loss": 0.8581, "step": 25860 }, { "epoch": 0.4541863445636335, "grad_norm": 0.08861856660405056, "learning_rate": 0.0001974586616450576, "loss": 0.858, "step": 25870 }, { "epoch": 0.4543619094436349, "grad_norm": 0.06511130204204882, "learning_rate": 0.0001974561523249285, "loss": 0.8456, "step": 25880 }, { "epoch": 0.4545374743236363, "grad_norm": 0.057123727995082245, "learning_rate": 0.00019745364178268146, "loss": 0.8523, "step": 25890 }, { "epoch": 0.4547130392036377, "grad_norm": 0.07292345221291806, "learning_rate": 0.00019745113001834835, "loss": 0.8552, "step": 25900 }, { "epoch": 0.4548886040836391, "grad_norm": 0.06204831190602485, "learning_rate": 0.00019744861703196087, "loss": 0.8614, "step": 25910 }, { "epoch": 0.4550641689636405, "grad_norm": 0.06643268127765942, "learning_rate": 0.00019744610282355096, "loss": 0.8556, "step": 25920 }, { "epoch": 0.4552397338436419, "grad_norm": 0.09204164757507034, "learning_rate": 0.00019744358739315045, "loss": 0.8629, "step": 25930 }, { "epoch": 0.45541529872364334, "grad_norm": 0.06915176108425833, "learning_rate": 0.00019744107074079118, "loss": 0.8616, "step": 25940 }, { "epoch": 0.4555908636036447, "grad_norm": 0.05846330624786993, "learning_rate": 0.00019743855286650508, "loss": 0.8567, "step": 25950 }, { "epoch": 0.4557664284836461, "grad_norm": 0.0709093899290539, "learning_rate": 0.00019743603377032402, "loss": 0.847, "step": 25960 }, { "epoch": 0.45594199336364755, "grad_norm": 0.07469314654917064, "learning_rate": 0.00019743351345227994, "loss": 0.8459, "step": 25970 }, { "epoch": 0.4561175582436489, "grad_norm": 0.059695903654907644, "learning_rate": 0.00019743099191240474, "loss": 0.8515, "step": 25980 }, { "epoch": 0.45629312312365033, "grad_norm": 0.06198680149354743, "learning_rate": 0.0001974284691507304, "loss": 0.8537, "step": 25990 }, { "epoch": 0.45646868800365176, "grad_norm": 0.0791742065237304, "learning_rate": 0.0001974259451672889, "loss": 0.8585, "step": 26000 }, { "epoch": 0.4566442528836532, "grad_norm": 0.05781224818517847, "learning_rate": 0.0001974234199621122, "loss": 0.8505, "step": 26010 }, { "epoch": 0.45681981776365455, "grad_norm": 0.09219073236747584, "learning_rate": 0.00019742089353523226, "loss": 0.8541, "step": 26020 }, { "epoch": 0.45699538264365597, "grad_norm": 0.06185860386241468, "learning_rate": 0.00019741836588668113, "loss": 0.8562, "step": 26030 }, { "epoch": 0.4571709475236574, "grad_norm": 0.06329335041108722, "learning_rate": 0.0001974158370164908, "loss": 0.8528, "step": 26040 }, { "epoch": 0.45734651240365876, "grad_norm": 0.07781871297151056, "learning_rate": 0.00019741330692469339, "loss": 0.8512, "step": 26050 }, { "epoch": 0.4575220772836602, "grad_norm": 0.06439762780958869, "learning_rate": 0.00019741077561132085, "loss": 0.8655, "step": 26060 }, { "epoch": 0.4576976421636616, "grad_norm": 0.057798955374480515, "learning_rate": 0.00019740824307640535, "loss": 0.8631, "step": 26070 }, { "epoch": 0.45787320704366297, "grad_norm": 0.07058121455304425, "learning_rate": 0.00019740570931997892, "loss": 0.849, "step": 26080 }, { "epoch": 0.4580487719236644, "grad_norm": 0.05803130591280722, "learning_rate": 0.00019740317434207368, "loss": 0.8623, "step": 26090 }, { "epoch": 0.4582243368036658, "grad_norm": 0.10659324908157354, "learning_rate": 0.00019740063814272172, "loss": 0.8393, "step": 26100 }, { "epoch": 0.4583999016836672, "grad_norm": 0.04964848965698683, "learning_rate": 0.00019739810072195521, "loss": 0.8481, "step": 26110 }, { "epoch": 0.4585754665636686, "grad_norm": 0.05937466818366005, "learning_rate": 0.0001973955620798063, "loss": 0.8556, "step": 26120 }, { "epoch": 0.45875103144367, "grad_norm": 0.08309738465008282, "learning_rate": 0.00019739302221630716, "loss": 0.8603, "step": 26130 }, { "epoch": 0.4589265963236714, "grad_norm": 0.07569544799658894, "learning_rate": 0.00019739048113148993, "loss": 0.8556, "step": 26140 }, { "epoch": 0.4591021612036728, "grad_norm": 0.0581931298140712, "learning_rate": 0.00019738793882538683, "loss": 0.8597, "step": 26150 }, { "epoch": 0.45927772608367423, "grad_norm": 0.06024352766630603, "learning_rate": 0.00019738539529803008, "loss": 0.8528, "step": 26160 }, { "epoch": 0.45945329096367565, "grad_norm": 0.09786796744735765, "learning_rate": 0.0001973828505494519, "loss": 0.8504, "step": 26170 }, { "epoch": 0.459628855843677, "grad_norm": 0.07645646939371104, "learning_rate": 0.00019738030457968454, "loss": 0.8581, "step": 26180 }, { "epoch": 0.45980442072367844, "grad_norm": 0.07815478775210909, "learning_rate": 0.00019737775738876023, "loss": 0.862, "step": 26190 }, { "epoch": 0.45997998560367986, "grad_norm": 0.07306504737766241, "learning_rate": 0.00019737520897671127, "loss": 0.856, "step": 26200 }, { "epoch": 0.46015555048368123, "grad_norm": 0.06297350356356136, "learning_rate": 0.00019737265934356994, "loss": 0.8588, "step": 26210 }, { "epoch": 0.46033111536368265, "grad_norm": 0.06785527930383016, "learning_rate": 0.00019737010848936857, "loss": 0.8607, "step": 26220 }, { "epoch": 0.4605066802436841, "grad_norm": 0.09250933185283432, "learning_rate": 0.00019736755641413942, "loss": 0.8523, "step": 26230 }, { "epoch": 0.46068224512368544, "grad_norm": 0.06397657965565698, "learning_rate": 0.00019736500311791488, "loss": 0.8567, "step": 26240 }, { "epoch": 0.46085781000368686, "grad_norm": 0.06191457613190606, "learning_rate": 0.00019736244860072728, "loss": 0.8575, "step": 26250 }, { "epoch": 0.4610333748836883, "grad_norm": 0.05887282523356087, "learning_rate": 0.00019735989286260902, "loss": 0.8456, "step": 26260 }, { "epoch": 0.46120893976368965, "grad_norm": 0.06348652134994523, "learning_rate": 0.0001973573359035924, "loss": 0.8532, "step": 26270 }, { "epoch": 0.4613845046436911, "grad_norm": 0.06438693255766571, "learning_rate": 0.0001973547777237099, "loss": 0.8592, "step": 26280 }, { "epoch": 0.4615600695236925, "grad_norm": 0.07480862042205318, "learning_rate": 0.00019735221832299388, "loss": 0.8536, "step": 26290 }, { "epoch": 0.46173563440369386, "grad_norm": 0.0615523423254598, "learning_rate": 0.0001973496577014768, "loss": 0.8531, "step": 26300 }, { "epoch": 0.4619111992836953, "grad_norm": 0.06752819426740302, "learning_rate": 0.0001973470958591911, "loss": 0.8525, "step": 26310 }, { "epoch": 0.4620867641636967, "grad_norm": 0.06567431646929696, "learning_rate": 0.0001973445327961692, "loss": 0.8634, "step": 26320 }, { "epoch": 0.4622623290436981, "grad_norm": 0.05264530011156955, "learning_rate": 0.0001973419685124436, "loss": 0.8678, "step": 26330 }, { "epoch": 0.4624378939236995, "grad_norm": 0.06406744085634797, "learning_rate": 0.00019733940300804682, "loss": 0.8518, "step": 26340 }, { "epoch": 0.4626134588037009, "grad_norm": 0.05975998140019014, "learning_rate": 0.00019733683628301137, "loss": 0.8536, "step": 26350 }, { "epoch": 0.46278902368370234, "grad_norm": 0.06342105122601151, "learning_rate": 0.0001973342683373697, "loss": 0.852, "step": 26360 }, { "epoch": 0.4629645885637037, "grad_norm": 0.06292407016298075, "learning_rate": 0.0001973316991711544, "loss": 0.8506, "step": 26370 }, { "epoch": 0.4631401534437051, "grad_norm": 0.06554698926571184, "learning_rate": 0.00019732912878439802, "loss": 0.8485, "step": 26380 }, { "epoch": 0.46331571832370655, "grad_norm": 0.06864888219883872, "learning_rate": 0.0001973265571771331, "loss": 0.8635, "step": 26390 }, { "epoch": 0.4634912832037079, "grad_norm": 0.060651055027513584, "learning_rate": 0.00019732398434939223, "loss": 0.8538, "step": 26400 }, { "epoch": 0.46366684808370934, "grad_norm": 0.10985393004152066, "learning_rate": 0.00019732141030120802, "loss": 0.8566, "step": 26410 }, { "epoch": 0.46384241296371076, "grad_norm": 0.07303041532196781, "learning_rate": 0.00019731883503261313, "loss": 0.8537, "step": 26420 }, { "epoch": 0.4640179778437121, "grad_norm": 0.06324763680670217, "learning_rate": 0.0001973162585436401, "loss": 0.8513, "step": 26430 }, { "epoch": 0.46419354272371355, "grad_norm": 0.058332092625942314, "learning_rate": 0.0001973136808343216, "loss": 0.8558, "step": 26440 }, { "epoch": 0.46436910760371497, "grad_norm": 0.060225211905783914, "learning_rate": 0.00019731110190469032, "loss": 0.8534, "step": 26450 }, { "epoch": 0.46454467248371634, "grad_norm": 0.06090528901855355, "learning_rate": 0.00019730852175477894, "loss": 0.8496, "step": 26460 }, { "epoch": 0.46472023736371776, "grad_norm": 0.06304923506048156, "learning_rate": 0.0001973059403846201, "loss": 0.8663, "step": 26470 }, { "epoch": 0.4648958022437192, "grad_norm": 0.07332038216050225, "learning_rate": 0.0001973033577942466, "loss": 0.8594, "step": 26480 }, { "epoch": 0.46507136712372055, "grad_norm": 0.07054149587920959, "learning_rate": 0.00019730077398369102, "loss": 0.8431, "step": 26490 }, { "epoch": 0.46524693200372197, "grad_norm": 0.07373895429775396, "learning_rate": 0.00019729818895298625, "loss": 0.8494, "step": 26500 }, { "epoch": 0.4654224968837234, "grad_norm": 0.06070831681420259, "learning_rate": 0.0001972956027021649, "loss": 0.8566, "step": 26510 }, { "epoch": 0.46559806176372476, "grad_norm": 0.05727110036337638, "learning_rate": 0.00019729301523125985, "loss": 0.8478, "step": 26520 }, { "epoch": 0.4657736266437262, "grad_norm": 0.05911256446091189, "learning_rate": 0.00019729042654030384, "loss": 0.8527, "step": 26530 }, { "epoch": 0.4659491915237276, "grad_norm": 0.06314722630865474, "learning_rate": 0.00019728783662932965, "loss": 0.8509, "step": 26540 }, { "epoch": 0.466124756403729, "grad_norm": 0.06277261603433518, "learning_rate": 0.00019728524549837015, "loss": 0.8627, "step": 26550 }, { "epoch": 0.4663003212837304, "grad_norm": 0.07187657135366095, "learning_rate": 0.00019728265314745811, "loss": 0.8576, "step": 26560 }, { "epoch": 0.4664758861637318, "grad_norm": 0.061149791830974735, "learning_rate": 0.0001972800595766264, "loss": 0.8602, "step": 26570 }, { "epoch": 0.46665145104373323, "grad_norm": 0.04997806297382646, "learning_rate": 0.0001972774647859079, "loss": 0.8608, "step": 26580 }, { "epoch": 0.4668270159237346, "grad_norm": 0.08171766670386364, "learning_rate": 0.0001972748687753355, "loss": 0.8607, "step": 26590 }, { "epoch": 0.467002580803736, "grad_norm": 0.08464067451262, "learning_rate": 0.000197272271544942, "loss": 0.8611, "step": 26600 }, { "epoch": 0.46717814568373744, "grad_norm": 0.0707141193108053, "learning_rate": 0.0001972696730947604, "loss": 0.8515, "step": 26610 }, { "epoch": 0.4673537105637388, "grad_norm": 0.07434070586382739, "learning_rate": 0.0001972670734248236, "loss": 0.8548, "step": 26620 }, { "epoch": 0.46752927544374023, "grad_norm": 0.09037047372551983, "learning_rate": 0.00019726447253516454, "loss": 0.8531, "step": 26630 }, { "epoch": 0.46770484032374166, "grad_norm": 0.07113035733731866, "learning_rate": 0.00019726187042581612, "loss": 0.8518, "step": 26640 }, { "epoch": 0.467880405203743, "grad_norm": 0.06434064330213746, "learning_rate": 0.00019725926709681136, "loss": 0.8613, "step": 26650 }, { "epoch": 0.46805597008374444, "grad_norm": 0.060887858542256756, "learning_rate": 0.00019725666254818324, "loss": 0.8571, "step": 26660 }, { "epoch": 0.46823153496374587, "grad_norm": 0.058992315120566435, "learning_rate": 0.0001972540567799648, "loss": 0.8517, "step": 26670 }, { "epoch": 0.46840709984374723, "grad_norm": 0.07839284333847493, "learning_rate": 0.000197251449792189, "loss": 0.8549, "step": 26680 }, { "epoch": 0.46858266472374865, "grad_norm": 0.0507488083887886, "learning_rate": 0.00019724884158488882, "loss": 0.8539, "step": 26690 }, { "epoch": 0.4687582296037501, "grad_norm": 0.06119576619958442, "learning_rate": 0.00019724623215809748, "loss": 0.8564, "step": 26700 }, { "epoch": 0.46893379448375144, "grad_norm": 0.06408336786298548, "learning_rate": 0.00019724362151184784, "loss": 0.8579, "step": 26710 }, { "epoch": 0.46910935936375286, "grad_norm": 0.0644146252790462, "learning_rate": 0.00019724100964617312, "loss": 0.8463, "step": 26720 }, { "epoch": 0.4692849242437543, "grad_norm": 0.06743610101294686, "learning_rate": 0.00019723839656110635, "loss": 0.8529, "step": 26730 }, { "epoch": 0.4694604891237557, "grad_norm": 0.06915855696397694, "learning_rate": 0.00019723578225668063, "loss": 0.8577, "step": 26740 }, { "epoch": 0.4696360540037571, "grad_norm": 0.06283346400551916, "learning_rate": 0.0001972331667329291, "loss": 0.8577, "step": 26750 }, { "epoch": 0.4698116188837585, "grad_norm": 0.05420639805557297, "learning_rate": 0.00019723054998988494, "loss": 0.8616, "step": 26760 }, { "epoch": 0.4699871837637599, "grad_norm": 0.07025774787070745, "learning_rate": 0.00019722793202758127, "loss": 0.8581, "step": 26770 }, { "epoch": 0.4701627486437613, "grad_norm": 0.0697601324510459, "learning_rate": 0.00019722531284605127, "loss": 0.8615, "step": 26780 }, { "epoch": 0.4703383135237627, "grad_norm": 0.06383171459732533, "learning_rate": 0.0001972226924453281, "loss": 0.8588, "step": 26790 }, { "epoch": 0.47051387840376413, "grad_norm": 0.10879940764412253, "learning_rate": 0.00019722007082544494, "loss": 0.8551, "step": 26800 }, { "epoch": 0.4706894432837655, "grad_norm": 0.06624317600283815, "learning_rate": 0.00019721744798643508, "loss": 0.8528, "step": 26810 }, { "epoch": 0.4708650081637669, "grad_norm": 0.07607252469210495, "learning_rate": 0.0001972148239283317, "loss": 0.8534, "step": 26820 }, { "epoch": 0.47104057304376834, "grad_norm": 0.059611879922200045, "learning_rate": 0.00019721219865116807, "loss": 0.8538, "step": 26830 }, { "epoch": 0.4712161379237697, "grad_norm": 0.05322570517771284, "learning_rate": 0.00019720957215497745, "loss": 0.8476, "step": 26840 }, { "epoch": 0.47139170280377113, "grad_norm": 0.0654096482193431, "learning_rate": 0.0001972069444397931, "loss": 0.8564, "step": 26850 }, { "epoch": 0.47156726768377255, "grad_norm": 0.055160666191559925, "learning_rate": 0.0001972043155056483, "loss": 0.8605, "step": 26860 }, { "epoch": 0.4717428325637739, "grad_norm": 0.09544192896123628, "learning_rate": 0.00019720168535257643, "loss": 0.8559, "step": 26870 }, { "epoch": 0.47191839744377534, "grad_norm": 0.07500152821653201, "learning_rate": 0.00019719905398061073, "loss": 0.8496, "step": 26880 }, { "epoch": 0.47209396232377676, "grad_norm": 0.0685291226268722, "learning_rate": 0.0001971964213897846, "loss": 0.8539, "step": 26890 }, { "epoch": 0.4722695272037782, "grad_norm": 0.0635625280811844, "learning_rate": 0.0001971937875801314, "loss": 0.8635, "step": 26900 }, { "epoch": 0.47244509208377955, "grad_norm": 0.07064911737222981, "learning_rate": 0.00019719115255168444, "loss": 0.8588, "step": 26910 }, { "epoch": 0.47262065696378097, "grad_norm": 0.06217224542783314, "learning_rate": 0.0001971885163044772, "loss": 0.8565, "step": 26920 }, { "epoch": 0.4727962218437824, "grad_norm": 0.05841006781324115, "learning_rate": 0.00019718587883854293, "loss": 0.8547, "step": 26930 }, { "epoch": 0.47297178672378376, "grad_norm": 0.06904368806585524, "learning_rate": 0.0001971832401539152, "loss": 0.8536, "step": 26940 }, { "epoch": 0.4731473516037852, "grad_norm": 0.06311542859313705, "learning_rate": 0.0001971806002506274, "loss": 0.8592, "step": 26950 }, { "epoch": 0.4733229164837866, "grad_norm": 0.062353835494114176, "learning_rate": 0.00019717795912871293, "loss": 0.8549, "step": 26960 }, { "epoch": 0.47349848136378797, "grad_norm": 0.11503668036406806, "learning_rate": 0.00019717531678820527, "loss": 0.8593, "step": 26970 }, { "epoch": 0.4736740462437894, "grad_norm": 0.08185983466603101, "learning_rate": 0.00019717267322913795, "loss": 0.858, "step": 26980 }, { "epoch": 0.4738496111237908, "grad_norm": 0.06630980104330357, "learning_rate": 0.0001971700284515444, "loss": 0.8648, "step": 26990 }, { "epoch": 0.4740251760037922, "grad_norm": 0.09181406606766367, "learning_rate": 0.0001971673824554582, "loss": 0.8567, "step": 27000 }, { "epoch": 0.4742007408837936, "grad_norm": 0.062316612121533686, "learning_rate": 0.0001971647352409128, "loss": 0.8524, "step": 27010 }, { "epoch": 0.474376305763795, "grad_norm": 0.06353189441594131, "learning_rate": 0.00019716208680794177, "loss": 0.8514, "step": 27020 }, { "epoch": 0.4745518706437964, "grad_norm": 0.0840507419055481, "learning_rate": 0.00019715943715657868, "loss": 0.8596, "step": 27030 }, { "epoch": 0.4747274355237978, "grad_norm": 0.09631547288685952, "learning_rate": 0.0001971567862868571, "loss": 0.8564, "step": 27040 }, { "epoch": 0.47490300040379924, "grad_norm": 0.07430686719049828, "learning_rate": 0.00019715413419881062, "loss": 0.8562, "step": 27050 }, { "epoch": 0.4750785652838006, "grad_norm": 0.10026830517995503, "learning_rate": 0.00019715148089247283, "loss": 0.8588, "step": 27060 }, { "epoch": 0.475254130163802, "grad_norm": 0.07478864479035628, "learning_rate": 0.00019714882636787733, "loss": 0.859, "step": 27070 }, { "epoch": 0.47542969504380345, "grad_norm": 0.04822948606426243, "learning_rate": 0.00019714617062505774, "loss": 0.8623, "step": 27080 }, { "epoch": 0.47560525992380487, "grad_norm": 0.07100643247143651, "learning_rate": 0.00019714351366404779, "loss": 0.8606, "step": 27090 }, { "epoch": 0.47578082480380623, "grad_norm": 0.0881956980849901, "learning_rate": 0.00019714085548488107, "loss": 0.8504, "step": 27100 }, { "epoch": 0.47595638968380766, "grad_norm": 0.07650538858460322, "learning_rate": 0.0001971381960875913, "loss": 0.8587, "step": 27110 }, { "epoch": 0.4761319545638091, "grad_norm": 0.06326272642744339, "learning_rate": 0.0001971355354722122, "loss": 0.8621, "step": 27120 }, { "epoch": 0.47630751944381045, "grad_norm": 0.07650012627697049, "learning_rate": 0.0001971328736387774, "loss": 0.8538, "step": 27130 }, { "epoch": 0.47648308432381187, "grad_norm": 0.06561713987030827, "learning_rate": 0.0001971302105873207, "loss": 0.8626, "step": 27140 }, { "epoch": 0.4766586492038133, "grad_norm": 0.0557605189838291, "learning_rate": 0.00019712754631787574, "loss": 0.856, "step": 27150 }, { "epoch": 0.47683421408381466, "grad_norm": 0.07248665010368488, "learning_rate": 0.0001971248808304764, "loss": 0.8549, "step": 27160 }, { "epoch": 0.4770097789638161, "grad_norm": 0.059109716405381874, "learning_rate": 0.00019712221412515637, "loss": 0.8551, "step": 27170 }, { "epoch": 0.4771853438438175, "grad_norm": 0.07786637259915112, "learning_rate": 0.00019711954620194947, "loss": 0.8521, "step": 27180 }, { "epoch": 0.47736090872381887, "grad_norm": 0.07036313806291657, "learning_rate": 0.00019711687706088952, "loss": 0.8618, "step": 27190 }, { "epoch": 0.4775364736038203, "grad_norm": 0.07562785145597291, "learning_rate": 0.0001971142067020103, "loss": 0.8493, "step": 27200 }, { "epoch": 0.4777120384838217, "grad_norm": 0.06102242095308004, "learning_rate": 0.00019711153512534568, "loss": 0.8611, "step": 27210 }, { "epoch": 0.4778876033638231, "grad_norm": 0.0638127087130811, "learning_rate": 0.00019710886233092947, "loss": 0.8575, "step": 27220 }, { "epoch": 0.4780631682438245, "grad_norm": 0.1099226829128162, "learning_rate": 0.00019710618831879554, "loss": 0.8591, "step": 27230 }, { "epoch": 0.4782387331238259, "grad_norm": 0.08905341714928514, "learning_rate": 0.0001971035130889778, "loss": 0.8594, "step": 27240 }, { "epoch": 0.4784142980038273, "grad_norm": 0.07175823802811242, "learning_rate": 0.00019710083664151016, "loss": 0.8552, "step": 27250 }, { "epoch": 0.4785898628838287, "grad_norm": 0.0739457202478879, "learning_rate": 0.00019709815897642647, "loss": 0.8602, "step": 27260 }, { "epoch": 0.47876542776383013, "grad_norm": 0.06758692472465767, "learning_rate": 0.0001970954800937607, "loss": 0.8492, "step": 27270 }, { "epoch": 0.47894099264383155, "grad_norm": 0.07294576388880668, "learning_rate": 0.00019709279999354678, "loss": 0.8585, "step": 27280 }, { "epoch": 0.4791165575238329, "grad_norm": 0.09820665008744509, "learning_rate": 0.00019709011867581868, "loss": 0.8489, "step": 27290 }, { "epoch": 0.47929212240383434, "grad_norm": 0.08618211523350933, "learning_rate": 0.00019708743614061034, "loss": 0.8518, "step": 27300 }, { "epoch": 0.47946768728383576, "grad_norm": 0.060274246487335, "learning_rate": 0.00019708475238795577, "loss": 0.8534, "step": 27310 }, { "epoch": 0.47964325216383713, "grad_norm": 0.05898601147409525, "learning_rate": 0.00019708206741788896, "loss": 0.8542, "step": 27320 }, { "epoch": 0.47981881704383855, "grad_norm": 0.054838399993351876, "learning_rate": 0.000197079381230444, "loss": 0.8582, "step": 27330 }, { "epoch": 0.47999438192384, "grad_norm": 0.06785639807392535, "learning_rate": 0.0001970766938256548, "loss": 0.854, "step": 27340 }, { "epoch": 0.48016994680384134, "grad_norm": 0.07638838428275395, "learning_rate": 0.0001970740052035555, "loss": 0.859, "step": 27350 }, { "epoch": 0.48034551168384276, "grad_norm": 0.06953102734728361, "learning_rate": 0.00019707131536418016, "loss": 0.8519, "step": 27360 }, { "epoch": 0.4805210765638442, "grad_norm": 0.05665040596576454, "learning_rate": 0.00019706862430756284, "loss": 0.8555, "step": 27370 }, { "epoch": 0.48069664144384555, "grad_norm": 0.07172261542305274, "learning_rate": 0.00019706593203373763, "loss": 0.8589, "step": 27380 }, { "epoch": 0.480872206323847, "grad_norm": 0.08768823651411391, "learning_rate": 0.00019706323854273863, "loss": 0.8563, "step": 27390 }, { "epoch": 0.4810477712038484, "grad_norm": 0.06113865244925923, "learning_rate": 0.0001970605438346, "loss": 0.8537, "step": 27400 }, { "epoch": 0.48122333608384976, "grad_norm": 0.06504890774484924, "learning_rate": 0.00019705784790935587, "loss": 0.8625, "step": 27410 }, { "epoch": 0.4813989009638512, "grad_norm": 0.05688937528245086, "learning_rate": 0.0001970551507670404, "loss": 0.8583, "step": 27420 }, { "epoch": 0.4815744658438526, "grad_norm": 0.07186803308600347, "learning_rate": 0.00019705245240768775, "loss": 0.8496, "step": 27430 }, { "epoch": 0.481750030723854, "grad_norm": 0.08790078781507331, "learning_rate": 0.0001970497528313321, "loss": 0.8472, "step": 27440 }, { "epoch": 0.4819255956038554, "grad_norm": 0.05386567030297408, "learning_rate": 0.00019704705203800775, "loss": 0.8501, "step": 27450 }, { "epoch": 0.4821011604838568, "grad_norm": 0.06268386956392248, "learning_rate": 0.0001970443500277488, "loss": 0.8592, "step": 27460 }, { "epoch": 0.48227672536385824, "grad_norm": 0.05708046906377315, "learning_rate": 0.0001970416468005895, "loss": 0.8659, "step": 27470 }, { "epoch": 0.4824522902438596, "grad_norm": 0.06958606396959711, "learning_rate": 0.0001970389423565642, "loss": 0.8545, "step": 27480 }, { "epoch": 0.482627855123861, "grad_norm": 0.06313474593042838, "learning_rate": 0.00019703623669570702, "loss": 0.8505, "step": 27490 }, { "epoch": 0.48280342000386245, "grad_norm": 0.06427527764303687, "learning_rate": 0.00019703352981805235, "loss": 0.8567, "step": 27500 }, { "epoch": 0.4829789848838638, "grad_norm": 0.06765557713218012, "learning_rate": 0.00019703082172363445, "loss": 0.8584, "step": 27510 }, { "epoch": 0.48315454976386524, "grad_norm": 0.054144149276880514, "learning_rate": 0.00019702811241248763, "loss": 0.8558, "step": 27520 }, { "epoch": 0.48333011464386666, "grad_norm": 0.061267536834216214, "learning_rate": 0.00019702540188464625, "loss": 0.8595, "step": 27530 }, { "epoch": 0.483505679523868, "grad_norm": 0.05504511996244035, "learning_rate": 0.0001970226901401446, "loss": 0.8561, "step": 27540 }, { "epoch": 0.48368124440386945, "grad_norm": 0.07838870881142505, "learning_rate": 0.00019701997717901703, "loss": 0.8612, "step": 27550 }, { "epoch": 0.48385680928387087, "grad_norm": 0.08555503188066257, "learning_rate": 0.00019701726300129797, "loss": 0.8585, "step": 27560 }, { "epoch": 0.48403237416387224, "grad_norm": 0.08683377504321554, "learning_rate": 0.00019701454760702182, "loss": 0.8546, "step": 27570 }, { "epoch": 0.48420793904387366, "grad_norm": 0.07944836326217591, "learning_rate": 0.00019701183099622293, "loss": 0.854, "step": 27580 }, { "epoch": 0.4843835039238751, "grad_norm": 0.0564204983108016, "learning_rate": 0.0001970091131689357, "loss": 0.8519, "step": 27590 }, { "epoch": 0.48455906880387645, "grad_norm": 0.07428200695757439, "learning_rate": 0.00019700639412519464, "loss": 0.8477, "step": 27600 }, { "epoch": 0.48473463368387787, "grad_norm": 0.06540456733060104, "learning_rate": 0.0001970036738650342, "loss": 0.8591, "step": 27610 }, { "epoch": 0.4849101985638793, "grad_norm": 0.08680075211634407, "learning_rate": 0.00019700095238848876, "loss": 0.8515, "step": 27620 }, { "epoch": 0.4850857634438807, "grad_norm": 0.1318464429460853, "learning_rate": 0.00019699822969559288, "loss": 0.8566, "step": 27630 }, { "epoch": 0.4852613283238821, "grad_norm": 0.06630031888808405, "learning_rate": 0.00019699550578638103, "loss": 0.8579, "step": 27640 }, { "epoch": 0.4854368932038835, "grad_norm": 0.07240842569717922, "learning_rate": 0.00019699278066088774, "loss": 0.8572, "step": 27650 }, { "epoch": 0.4856124580838849, "grad_norm": 0.05770079674679219, "learning_rate": 0.0001969900543191475, "loss": 0.86, "step": 27660 }, { "epoch": 0.4857880229638863, "grad_norm": 0.06509623105911508, "learning_rate": 0.0001969873267611949, "loss": 0.8605, "step": 27670 }, { "epoch": 0.4859635878438877, "grad_norm": 0.09301881989420197, "learning_rate": 0.00019698459798706444, "loss": 0.8602, "step": 27680 }, { "epoch": 0.48613915272388913, "grad_norm": 0.0747832824346993, "learning_rate": 0.00019698186799679073, "loss": 0.851, "step": 27690 }, { "epoch": 0.4863147176038905, "grad_norm": 0.06613081410063684, "learning_rate": 0.00019697913679040839, "loss": 0.8589, "step": 27700 }, { "epoch": 0.4864902824838919, "grad_norm": 0.054508370336264315, "learning_rate": 0.00019697640436795198, "loss": 0.8495, "step": 27710 }, { "epoch": 0.48666584736389334, "grad_norm": 0.057978159646401316, "learning_rate": 0.0001969736707294561, "loss": 0.8634, "step": 27720 }, { "epoch": 0.4868414122438947, "grad_norm": 0.07618244382054118, "learning_rate": 0.00019697093587495542, "loss": 0.85, "step": 27730 }, { "epoch": 0.48701697712389613, "grad_norm": 0.08086187047914759, "learning_rate": 0.00019696819980448467, "loss": 0.8646, "step": 27740 }, { "epoch": 0.48719254200389756, "grad_norm": 0.07924945099493652, "learning_rate": 0.00019696546251807837, "loss": 0.8544, "step": 27750 }, { "epoch": 0.4873681068838989, "grad_norm": 0.05977372430135445, "learning_rate": 0.00019696272401577126, "loss": 0.8593, "step": 27760 }, { "epoch": 0.48754367176390034, "grad_norm": 0.05557158049491057, "learning_rate": 0.00019695998429759807, "loss": 0.8533, "step": 27770 }, { "epoch": 0.48771923664390177, "grad_norm": 0.06082241780043993, "learning_rate": 0.00019695724336359348, "loss": 0.8619, "step": 27780 }, { "epoch": 0.48789480152390313, "grad_norm": 0.07461557549725935, "learning_rate": 0.00019695450121379223, "loss": 0.8512, "step": 27790 }, { "epoch": 0.48807036640390455, "grad_norm": 0.0642805376579669, "learning_rate": 0.00019695175784822906, "loss": 0.85, "step": 27800 }, { "epoch": 0.488245931283906, "grad_norm": 0.08563438884587328, "learning_rate": 0.00019694901326693874, "loss": 0.8519, "step": 27810 }, { "epoch": 0.4884214961639074, "grad_norm": 0.04716129367903034, "learning_rate": 0.00019694626746995602, "loss": 0.8589, "step": 27820 }, { "epoch": 0.48859706104390876, "grad_norm": 0.06252324871832242, "learning_rate": 0.0001969435204573157, "loss": 0.8532, "step": 27830 }, { "epoch": 0.4887726259239102, "grad_norm": 0.05710053110990763, "learning_rate": 0.00019694077222905262, "loss": 0.854, "step": 27840 }, { "epoch": 0.4889481908039116, "grad_norm": 0.06776119869554335, "learning_rate": 0.00019693802278520153, "loss": 0.8555, "step": 27850 }, { "epoch": 0.489123755683913, "grad_norm": 0.06046355107025973, "learning_rate": 0.0001969352721257973, "loss": 0.8527, "step": 27860 }, { "epoch": 0.4892993205639144, "grad_norm": 0.07847880543809231, "learning_rate": 0.00019693252025087483, "loss": 0.8545, "step": 27870 }, { "epoch": 0.4894748854439158, "grad_norm": 0.0979912048807461, "learning_rate": 0.00019692976716046892, "loss": 0.8568, "step": 27880 }, { "epoch": 0.4896504503239172, "grad_norm": 0.05352378443811399, "learning_rate": 0.00019692701285461443, "loss": 0.8562, "step": 27890 }, { "epoch": 0.4898260152039186, "grad_norm": 0.05975911268630526, "learning_rate": 0.00019692425733334635, "loss": 0.8582, "step": 27900 }, { "epoch": 0.49000158008392003, "grad_norm": 0.0847764870368552, "learning_rate": 0.00019692150059669953, "loss": 0.8582, "step": 27910 }, { "epoch": 0.4901771449639214, "grad_norm": 0.06975054035546162, "learning_rate": 0.00019691874264470894, "loss": 0.8536, "step": 27920 }, { "epoch": 0.4903527098439228, "grad_norm": 0.07462167687298878, "learning_rate": 0.0001969159834774095, "loss": 0.8587, "step": 27930 }, { "epoch": 0.49052827472392424, "grad_norm": 0.054980067801393334, "learning_rate": 0.00019691322309483612, "loss": 0.8579, "step": 27940 }, { "epoch": 0.4907038396039256, "grad_norm": 0.05476062022442028, "learning_rate": 0.00019691046149702385, "loss": 0.8602, "step": 27950 }, { "epoch": 0.49087940448392703, "grad_norm": 0.06465711858667579, "learning_rate": 0.00019690769868400767, "loss": 0.8584, "step": 27960 }, { "epoch": 0.49105496936392845, "grad_norm": 0.045391642822662545, "learning_rate": 0.00019690493465582257, "loss": 0.8553, "step": 27970 }, { "epoch": 0.4912305342439298, "grad_norm": 0.07291529003120864, "learning_rate": 0.00019690216941250352, "loss": 0.8524, "step": 27980 }, { "epoch": 0.49140609912393124, "grad_norm": 0.06918945937199074, "learning_rate": 0.00019689940295408565, "loss": 0.8599, "step": 27990 }, { "epoch": 0.49158166400393266, "grad_norm": 0.08755967155698391, "learning_rate": 0.00019689663528060393, "loss": 0.8567, "step": 28000 }, { "epoch": 0.4917572288839341, "grad_norm": 0.07434169871817878, "learning_rate": 0.00019689386639209347, "loss": 0.8525, "step": 28010 }, { "epoch": 0.49193279376393545, "grad_norm": 0.06072088719711004, "learning_rate": 0.00019689109628858936, "loss": 0.8628, "step": 28020 }, { "epoch": 0.49210835864393687, "grad_norm": 0.07200055159366285, "learning_rate": 0.0001968883249701267, "loss": 0.8508, "step": 28030 }, { "epoch": 0.4922839235239383, "grad_norm": 0.08129810905823791, "learning_rate": 0.00019688555243674055, "loss": 0.857, "step": 28040 }, { "epoch": 0.49245948840393966, "grad_norm": 0.076158473551463, "learning_rate": 0.0001968827786884661, "loss": 0.8522, "step": 28050 }, { "epoch": 0.4926350532839411, "grad_norm": 0.07272978604364178, "learning_rate": 0.00019688000372533842, "loss": 0.8523, "step": 28060 }, { "epoch": 0.4928106181639425, "grad_norm": 0.07464715123310973, "learning_rate": 0.00019687722754739277, "loss": 0.8479, "step": 28070 }, { "epoch": 0.49298618304394387, "grad_norm": 0.051572127263426, "learning_rate": 0.00019687445015466426, "loss": 0.8656, "step": 28080 }, { "epoch": 0.4931617479239453, "grad_norm": 0.06988047618828351, "learning_rate": 0.00019687167154718808, "loss": 0.8563, "step": 28090 }, { "epoch": 0.4933373128039467, "grad_norm": 0.12961497372747008, "learning_rate": 0.00019686889172499948, "loss": 0.8505, "step": 28100 }, { "epoch": 0.4935128776839481, "grad_norm": 0.08134407597794163, "learning_rate": 0.00019686611068813362, "loss": 0.8475, "step": 28110 }, { "epoch": 0.4936884425639495, "grad_norm": 0.08796502108985815, "learning_rate": 0.00019686332843662577, "loss": 0.8603, "step": 28120 }, { "epoch": 0.4938640074439509, "grad_norm": 0.05270731126698327, "learning_rate": 0.00019686054497051116, "loss": 0.8589, "step": 28130 }, { "epoch": 0.4940395723239523, "grad_norm": 0.08023002171770685, "learning_rate": 0.00019685776028982506, "loss": 0.8511, "step": 28140 }, { "epoch": 0.4942151372039537, "grad_norm": 0.09513636342523608, "learning_rate": 0.00019685497439460284, "loss": 0.8591, "step": 28150 }, { "epoch": 0.49439070208395514, "grad_norm": 0.09196266949727114, "learning_rate": 0.00019685218728487965, "loss": 0.8582, "step": 28160 }, { "epoch": 0.49456626696395656, "grad_norm": 0.08144858461720979, "learning_rate": 0.00019684939896069093, "loss": 0.8538, "step": 28170 }, { "epoch": 0.4947418318439579, "grad_norm": 0.05386335071486513, "learning_rate": 0.00019684660942207192, "loss": 0.8566, "step": 28180 }, { "epoch": 0.49491739672395935, "grad_norm": 0.0635646125774291, "learning_rate": 0.00019684381866905799, "loss": 0.852, "step": 28190 }, { "epoch": 0.49509296160396077, "grad_norm": 0.06157510930610143, "learning_rate": 0.00019684102670168455, "loss": 0.8519, "step": 28200 }, { "epoch": 0.49526852648396213, "grad_norm": 0.09124931737547673, "learning_rate": 0.00019683823351998692, "loss": 0.8608, "step": 28210 }, { "epoch": 0.49544409136396356, "grad_norm": 0.0785034765854294, "learning_rate": 0.00019683543912400048, "loss": 0.8577, "step": 28220 }, { "epoch": 0.495619656243965, "grad_norm": 0.06061286037670452, "learning_rate": 0.0001968326435137607, "loss": 0.8649, "step": 28230 }, { "epoch": 0.49579522112396635, "grad_norm": 0.05003654296561814, "learning_rate": 0.00019682984668930293, "loss": 0.8526, "step": 28240 }, { "epoch": 0.49597078600396777, "grad_norm": 0.1003020117510621, "learning_rate": 0.00019682704865066263, "loss": 0.8532, "step": 28250 }, { "epoch": 0.4961463508839692, "grad_norm": 0.0743356000140512, "learning_rate": 0.0001968242493978753, "loss": 0.8498, "step": 28260 }, { "epoch": 0.49632191576397056, "grad_norm": 0.07484948549150736, "learning_rate": 0.00019682144893097634, "loss": 0.8569, "step": 28270 }, { "epoch": 0.496497480643972, "grad_norm": 0.047856338197692215, "learning_rate": 0.00019681864725000126, "loss": 0.8632, "step": 28280 }, { "epoch": 0.4966730455239734, "grad_norm": 0.05817185207131404, "learning_rate": 0.0001968158443549855, "loss": 0.857, "step": 28290 }, { "epoch": 0.49684861040397477, "grad_norm": 0.06772494924207881, "learning_rate": 0.0001968130402459647, "loss": 0.8576, "step": 28300 }, { "epoch": 0.4970241752839762, "grad_norm": 0.09495119743544886, "learning_rate": 0.0001968102349229743, "loss": 0.8505, "step": 28310 }, { "epoch": 0.4971997401639776, "grad_norm": 0.08529626702908841, "learning_rate": 0.00019680742838604987, "loss": 0.8576, "step": 28320 }, { "epoch": 0.497375305043979, "grad_norm": 0.0642344918507555, "learning_rate": 0.00019680462063522696, "loss": 0.86, "step": 28330 }, { "epoch": 0.4975508699239804, "grad_norm": 0.054977266448035134, "learning_rate": 0.00019680181167054114, "loss": 0.8504, "step": 28340 }, { "epoch": 0.4977264348039818, "grad_norm": 0.07306428823664031, "learning_rate": 0.000196799001492028, "loss": 0.8528, "step": 28350 }, { "epoch": 0.49790199968398324, "grad_norm": 0.0920889170344716, "learning_rate": 0.00019679619009972313, "loss": 0.8635, "step": 28360 }, { "epoch": 0.4980775645639846, "grad_norm": 0.0765013935219488, "learning_rate": 0.00019679337749366224, "loss": 0.8551, "step": 28370 }, { "epoch": 0.49825312944398603, "grad_norm": 0.06356673850019326, "learning_rate": 0.0001967905636738808, "loss": 0.8542, "step": 28380 }, { "epoch": 0.49842869432398745, "grad_norm": 0.0571872077185213, "learning_rate": 0.00019678774864041463, "loss": 0.8645, "step": 28390 }, { "epoch": 0.4986042592039888, "grad_norm": 0.07088453104304986, "learning_rate": 0.00019678493239329932, "loss": 0.8545, "step": 28400 }, { "epoch": 0.49877982408399024, "grad_norm": 0.07034833057149031, "learning_rate": 0.00019678211493257057, "loss": 0.8504, "step": 28410 }, { "epoch": 0.49895538896399166, "grad_norm": 0.06082019132869783, "learning_rate": 0.00019677929625826402, "loss": 0.8522, "step": 28420 }, { "epoch": 0.49913095384399303, "grad_norm": 0.07014849848342347, "learning_rate": 0.0001967764763704155, "loss": 0.8541, "step": 28430 }, { "epoch": 0.49930651872399445, "grad_norm": 0.0695813818672704, "learning_rate": 0.0001967736552690606, "loss": 0.8549, "step": 28440 }, { "epoch": 0.4994820836039959, "grad_norm": 0.061296807033246845, "learning_rate": 0.00019677083295423518, "loss": 0.8569, "step": 28450 }, { "epoch": 0.49965764848399724, "grad_norm": 0.05164469332146675, "learning_rate": 0.00019676800942597492, "loss": 0.856, "step": 28460 }, { "epoch": 0.49983321336399866, "grad_norm": 0.06187045413124916, "learning_rate": 0.00019676518468431561, "loss": 0.8566, "step": 28470 }, { "epoch": 0.500008778244, "grad_norm": 0.07080208278739544, "learning_rate": 0.0001967623587292931, "loss": 0.858, "step": 28480 }, { "epoch": 0.5001843431240015, "grad_norm": 0.06924266251622617, "learning_rate": 0.00019675953156094318, "loss": 0.8522, "step": 28490 }, { "epoch": 0.5003599080040029, "grad_norm": 0.07479032458144634, "learning_rate": 0.0001967567031793016, "loss": 0.8476, "step": 28500 }, { "epoch": 0.5005354728840042, "grad_norm": 0.0697578274623891, "learning_rate": 0.00019675387358440423, "loss": 0.8545, "step": 28510 }, { "epoch": 0.5007110377640057, "grad_norm": 0.07392040612534022, "learning_rate": 0.00019675104277628693, "loss": 0.8532, "step": 28520 }, { "epoch": 0.5008866026440071, "grad_norm": 0.053056300256200964, "learning_rate": 0.00019674821075498557, "loss": 0.8567, "step": 28530 }, { "epoch": 0.5010621675240085, "grad_norm": 0.06085034952845698, "learning_rate": 0.00019674537752053606, "loss": 0.8531, "step": 28540 }, { "epoch": 0.5012377324040099, "grad_norm": 0.08072573550764119, "learning_rate": 0.00019674254307297423, "loss": 0.8544, "step": 28550 }, { "epoch": 0.5014132972840113, "grad_norm": 0.052338672417524, "learning_rate": 0.00019673970741233607, "loss": 0.8577, "step": 28560 }, { "epoch": 0.5015888621640127, "grad_norm": 0.08874564303887145, "learning_rate": 0.00019673687053865742, "loss": 0.859, "step": 28570 }, { "epoch": 0.5017644270440141, "grad_norm": 0.05031177349109472, "learning_rate": 0.00019673403245197427, "loss": 0.8612, "step": 28580 }, { "epoch": 0.5019399919240155, "grad_norm": 0.07104443204574167, "learning_rate": 0.00019673119315232265, "loss": 0.8486, "step": 28590 }, { "epoch": 0.5021155568040169, "grad_norm": 0.07150738079363231, "learning_rate": 0.00019672835263973842, "loss": 0.8614, "step": 28600 }, { "epoch": 0.5022911216840183, "grad_norm": 0.10211901546966073, "learning_rate": 0.0001967255109142576, "loss": 0.8521, "step": 28610 }, { "epoch": 0.5024666865640197, "grad_norm": 0.05247101739565363, "learning_rate": 0.00019672266797591626, "loss": 0.8528, "step": 28620 }, { "epoch": 0.5026422514440212, "grad_norm": 0.056019336449479755, "learning_rate": 0.00019671982382475032, "loss": 0.8495, "step": 28630 }, { "epoch": 0.5028178163240226, "grad_norm": 0.06005855638448258, "learning_rate": 0.0001967169784607959, "loss": 0.8453, "step": 28640 }, { "epoch": 0.5029933812040239, "grad_norm": 0.06388575917662044, "learning_rate": 0.000196714131884089, "loss": 0.8582, "step": 28650 }, { "epoch": 0.5031689460840254, "grad_norm": 0.06636219963542278, "learning_rate": 0.00019671128409466573, "loss": 0.8516, "step": 28660 }, { "epoch": 0.5033445109640268, "grad_norm": 0.07276744060792363, "learning_rate": 0.0001967084350925621, "loss": 0.8526, "step": 28670 }, { "epoch": 0.5035200758440281, "grad_norm": 0.06609971040239032, "learning_rate": 0.0001967055848778143, "loss": 0.857, "step": 28680 }, { "epoch": 0.5036956407240296, "grad_norm": 0.0539923723641777, "learning_rate": 0.0001967027334504584, "loss": 0.8623, "step": 28690 }, { "epoch": 0.503871205604031, "grad_norm": 0.06104817812943375, "learning_rate": 0.0001966998808105305, "loss": 0.8606, "step": 28700 }, { "epoch": 0.5040467704840323, "grad_norm": 0.05873168959172005, "learning_rate": 0.00019669702695806676, "loss": 0.8607, "step": 28710 }, { "epoch": 0.5042223353640338, "grad_norm": 0.05607111484339036, "learning_rate": 0.0001966941718931034, "loss": 0.8607, "step": 28720 }, { "epoch": 0.5043979002440352, "grad_norm": 0.10451295084450993, "learning_rate": 0.0001966913156156765, "loss": 0.8548, "step": 28730 }, { "epoch": 0.5045734651240366, "grad_norm": 0.08709305290308908, "learning_rate": 0.0001966884581258223, "loss": 0.8511, "step": 28740 }, { "epoch": 0.504749030004038, "grad_norm": 0.07016128947802401, "learning_rate": 0.00019668559942357698, "loss": 0.8521, "step": 28750 }, { "epoch": 0.5049245948840394, "grad_norm": 0.05707602524864542, "learning_rate": 0.0001966827395089768, "loss": 0.863, "step": 28760 }, { "epoch": 0.5051001597640408, "grad_norm": 0.05913228237151569, "learning_rate": 0.00019667987838205799, "loss": 0.8518, "step": 28770 }, { "epoch": 0.5052757246440422, "grad_norm": 0.052697905057215604, "learning_rate": 0.00019667701604285677, "loss": 0.8579, "step": 28780 }, { "epoch": 0.5054512895240436, "grad_norm": 0.08582442836068573, "learning_rate": 0.0001966741524914094, "loss": 0.8536, "step": 28790 }, { "epoch": 0.505626854404045, "grad_norm": 0.06950675657062483, "learning_rate": 0.0001966712877277522, "loss": 0.8558, "step": 28800 }, { "epoch": 0.5058024192840465, "grad_norm": 0.06060124014249393, "learning_rate": 0.00019666842175192147, "loss": 0.8545, "step": 28810 }, { "epoch": 0.5059779841640478, "grad_norm": 0.06256384173547858, "learning_rate": 0.0001966655545639535, "loss": 0.8571, "step": 28820 }, { "epoch": 0.5061535490440492, "grad_norm": 0.07757160036396411, "learning_rate": 0.00019666268616388454, "loss": 0.8552, "step": 28830 }, { "epoch": 0.5063291139240507, "grad_norm": 0.08470328858370337, "learning_rate": 0.00019665981655175106, "loss": 0.8562, "step": 28840 }, { "epoch": 0.506504678804052, "grad_norm": 0.05498181256171088, "learning_rate": 0.0001966569457275894, "loss": 0.8587, "step": 28850 }, { "epoch": 0.5066802436840534, "grad_norm": 0.07268950193000177, "learning_rate": 0.0001966540736914359, "loss": 0.8567, "step": 28860 }, { "epoch": 0.5068558085640549, "grad_norm": 0.07515760630461223, "learning_rate": 0.00019665120044332692, "loss": 0.8579, "step": 28870 }, { "epoch": 0.5070313734440562, "grad_norm": 0.04815890347876562, "learning_rate": 0.00019664832598329893, "loss": 0.8578, "step": 28880 }, { "epoch": 0.5072069383240576, "grad_norm": 0.06363460532772033, "learning_rate": 0.00019664545031138831, "loss": 0.8499, "step": 28890 }, { "epoch": 0.5073825032040591, "grad_norm": 0.0693817560485975, "learning_rate": 0.0001966425734276315, "loss": 0.8513, "step": 28900 }, { "epoch": 0.5075580680840605, "grad_norm": 0.06500542995974883, "learning_rate": 0.00019663969533206496, "loss": 0.8609, "step": 28910 }, { "epoch": 0.5077336329640618, "grad_norm": 0.06763513668813761, "learning_rate": 0.00019663681602472512, "loss": 0.8584, "step": 28920 }, { "epoch": 0.5079091978440633, "grad_norm": 0.07890750280203841, "learning_rate": 0.00019663393550564852, "loss": 0.8565, "step": 28930 }, { "epoch": 0.5080847627240647, "grad_norm": 0.08023626402463954, "learning_rate": 0.00019663105377487164, "loss": 0.8604, "step": 28940 }, { "epoch": 0.508260327604066, "grad_norm": 0.060796979829604914, "learning_rate": 0.00019662817083243098, "loss": 0.8572, "step": 28950 }, { "epoch": 0.5084358924840675, "grad_norm": 0.08642459421522944, "learning_rate": 0.00019662528667836304, "loss": 0.8513, "step": 28960 }, { "epoch": 0.5086114573640689, "grad_norm": 0.05317917639198606, "learning_rate": 0.00019662240131270442, "loss": 0.8541, "step": 28970 }, { "epoch": 0.5087870222440702, "grad_norm": 0.058196154945924866, "learning_rate": 0.00019661951473549163, "loss": 0.8621, "step": 28980 }, { "epoch": 0.5089625871240717, "grad_norm": 0.07306531977394666, "learning_rate": 0.0001966166269467613, "loss": 0.8586, "step": 28990 }, { "epoch": 0.5091381520040731, "grad_norm": 0.057092530454340304, "learning_rate": 0.00019661373794654995, "loss": 0.8596, "step": 29000 }, { "epoch": 0.5093137168840746, "grad_norm": 0.07431827672749158, "learning_rate": 0.0001966108477348942, "loss": 0.8559, "step": 29010 }, { "epoch": 0.5094892817640759, "grad_norm": 0.08475306322635409, "learning_rate": 0.0001966079563118307, "loss": 0.8561, "step": 29020 }, { "epoch": 0.5096648466440773, "grad_norm": 0.06745497913107734, "learning_rate": 0.00019660506367739607, "loss": 0.8577, "step": 29030 }, { "epoch": 0.5098404115240788, "grad_norm": 0.10322730831876117, "learning_rate": 0.000196602169831627, "loss": 0.8573, "step": 29040 }, { "epoch": 0.5100159764040801, "grad_norm": 0.0575897692328454, "learning_rate": 0.00019659927477456003, "loss": 0.855, "step": 29050 }, { "epoch": 0.5101915412840815, "grad_norm": 0.05472248268909245, "learning_rate": 0.00019659637850623203, "loss": 0.8524, "step": 29060 }, { "epoch": 0.510367106164083, "grad_norm": 0.05995420307313575, "learning_rate": 0.00019659348102667951, "loss": 0.8648, "step": 29070 }, { "epoch": 0.5105426710440844, "grad_norm": 0.07341081443038537, "learning_rate": 0.00019659058233593933, "loss": 0.8601, "step": 29080 }, { "epoch": 0.5107182359240857, "grad_norm": 0.07080951659032095, "learning_rate": 0.0001965876824340481, "loss": 0.8434, "step": 29090 }, { "epoch": 0.5108938008040872, "grad_norm": 0.07071285257480808, "learning_rate": 0.00019658478132104264, "loss": 0.8471, "step": 29100 }, { "epoch": 0.5110693656840886, "grad_norm": 0.09505061167855376, "learning_rate": 0.00019658187899695967, "loss": 0.8484, "step": 29110 }, { "epoch": 0.5112449305640899, "grad_norm": 0.0820942882120441, "learning_rate": 0.00019657897546183597, "loss": 0.8474, "step": 29120 }, { "epoch": 0.5114204954440914, "grad_norm": 0.062400303136284654, "learning_rate": 0.00019657607071570836, "loss": 0.864, "step": 29130 }, { "epoch": 0.5115960603240928, "grad_norm": 0.07034939096987408, "learning_rate": 0.00019657316475861362, "loss": 0.8592, "step": 29140 }, { "epoch": 0.5117716252040941, "grad_norm": 0.0687911901552609, "learning_rate": 0.0001965702575905885, "loss": 0.8526, "step": 29150 }, { "epoch": 0.5119471900840956, "grad_norm": 0.0577520726358231, "learning_rate": 0.00019656734921166995, "loss": 0.8552, "step": 29160 }, { "epoch": 0.512122754964097, "grad_norm": 0.06779404537847444, "learning_rate": 0.00019656443962189478, "loss": 0.8576, "step": 29170 }, { "epoch": 0.5122983198440983, "grad_norm": 0.056556139365457336, "learning_rate": 0.00019656152882129984, "loss": 0.8479, "step": 29180 }, { "epoch": 0.5124738847240998, "grad_norm": 0.05799334908046857, "learning_rate": 0.000196558616809922, "loss": 0.8614, "step": 29190 }, { "epoch": 0.5126494496041012, "grad_norm": 0.072061132236632, "learning_rate": 0.0001965557035877982, "loss": 0.8551, "step": 29200 }, { "epoch": 0.5128250144841026, "grad_norm": 0.0621255207250955, "learning_rate": 0.00019655278915496526, "loss": 0.8544, "step": 29210 }, { "epoch": 0.513000579364104, "grad_norm": 0.07131089839899371, "learning_rate": 0.00019654987351146023, "loss": 0.8572, "step": 29220 }, { "epoch": 0.5131761442441054, "grad_norm": 0.08437397224596317, "learning_rate": 0.00019654695665731996, "loss": 0.8485, "step": 29230 }, { "epoch": 0.5133517091241068, "grad_norm": 0.0870093742261487, "learning_rate": 0.00019654403859258146, "loss": 0.8558, "step": 29240 }, { "epoch": 0.5135272740041082, "grad_norm": 0.06494989286053843, "learning_rate": 0.00019654111931728164, "loss": 0.8536, "step": 29250 }, { "epoch": 0.5137028388841096, "grad_norm": 0.059563080023402645, "learning_rate": 0.0001965381988314576, "loss": 0.8616, "step": 29260 }, { "epoch": 0.513878403764111, "grad_norm": 0.058950337559111624, "learning_rate": 0.0001965352771351462, "loss": 0.8474, "step": 29270 }, { "epoch": 0.5140539686441125, "grad_norm": 0.06257656365168376, "learning_rate": 0.00019653235422838458, "loss": 0.8568, "step": 29280 }, { "epoch": 0.5142295335241138, "grad_norm": 0.07248588077789765, "learning_rate": 0.00019652943011120967, "loss": 0.8531, "step": 29290 }, { "epoch": 0.5144050984041152, "grad_norm": 0.06747371814498279, "learning_rate": 0.00019652650478365862, "loss": 0.8489, "step": 29300 }, { "epoch": 0.5145806632841167, "grad_norm": 0.06494721004421337, "learning_rate": 0.00019652357824576845, "loss": 0.8592, "step": 29310 }, { "epoch": 0.514756228164118, "grad_norm": 0.07534076794443097, "learning_rate": 0.0001965206504975762, "loss": 0.8531, "step": 29320 }, { "epoch": 0.5149317930441194, "grad_norm": 0.05588054727341398, "learning_rate": 0.000196517721539119, "loss": 0.8573, "step": 29330 }, { "epoch": 0.5151073579241209, "grad_norm": 0.07040679177811111, "learning_rate": 0.00019651479137043398, "loss": 0.8549, "step": 29340 }, { "epoch": 0.5152829228041222, "grad_norm": 0.06258757124726429, "learning_rate": 0.0001965118599915583, "loss": 0.848, "step": 29350 }, { "epoch": 0.5154584876841237, "grad_norm": 0.0743451128721037, "learning_rate": 0.00019650892740252895, "loss": 0.8619, "step": 29360 }, { "epoch": 0.5156340525641251, "grad_norm": 0.0625483912213372, "learning_rate": 0.00019650599360338322, "loss": 0.8502, "step": 29370 }, { "epoch": 0.5158096174441265, "grad_norm": 0.06596670612278381, "learning_rate": 0.00019650305859415828, "loss": 0.8517, "step": 29380 }, { "epoch": 0.5159851823241279, "grad_norm": 0.06149868108193842, "learning_rate": 0.00019650012237489126, "loss": 0.8593, "step": 29390 }, { "epoch": 0.5161607472041293, "grad_norm": 0.06414077129217328, "learning_rate": 0.0001964971849456194, "loss": 0.8573, "step": 29400 }, { "epoch": 0.5163363120841307, "grad_norm": 0.05483397338438271, "learning_rate": 0.00019649424630637988, "loss": 0.8667, "step": 29410 }, { "epoch": 0.5165118769641321, "grad_norm": 0.06486684862629184, "learning_rate": 0.00019649130645720998, "loss": 0.8464, "step": 29420 }, { "epoch": 0.5166874418441335, "grad_norm": 0.06471348189800105, "learning_rate": 0.00019648836539814691, "loss": 0.8532, "step": 29430 }, { "epoch": 0.5168630067241349, "grad_norm": 0.06002676805311119, "learning_rate": 0.00019648542312922799, "loss": 0.8608, "step": 29440 }, { "epoch": 0.5170385716041364, "grad_norm": 0.0729591386512088, "learning_rate": 0.0001964824796504904, "loss": 0.8536, "step": 29450 }, { "epoch": 0.5172141364841377, "grad_norm": 0.06549644377653978, "learning_rate": 0.00019647953496197153, "loss": 0.8535, "step": 29460 }, { "epoch": 0.5173897013641391, "grad_norm": 0.07681369193735942, "learning_rate": 0.00019647658906370865, "loss": 0.859, "step": 29470 }, { "epoch": 0.5175652662441406, "grad_norm": 0.05636037831782027, "learning_rate": 0.0001964736419557391, "loss": 0.8605, "step": 29480 }, { "epoch": 0.5177408311241419, "grad_norm": 0.05264371463297658, "learning_rate": 0.0001964706936381002, "loss": 0.8548, "step": 29490 }, { "epoch": 0.5179163960041433, "grad_norm": 0.06909741107248228, "learning_rate": 0.0001964677441108293, "loss": 0.8569, "step": 29500 }, { "epoch": 0.5180919608841448, "grad_norm": 0.08323226397303046, "learning_rate": 0.00019646479337396378, "loss": 0.8614, "step": 29510 }, { "epoch": 0.5182675257641461, "grad_norm": 0.06503661042138759, "learning_rate": 0.00019646184142754107, "loss": 0.8495, "step": 29520 }, { "epoch": 0.5184430906441475, "grad_norm": 0.05664063271067172, "learning_rate": 0.0001964588882715985, "loss": 0.8568, "step": 29530 }, { "epoch": 0.518618655524149, "grad_norm": 0.06746343422238885, "learning_rate": 0.0001964559339061735, "loss": 0.847, "step": 29540 }, { "epoch": 0.5187942204041504, "grad_norm": 0.0603684990626405, "learning_rate": 0.0001964529783313036, "loss": 0.855, "step": 29550 }, { "epoch": 0.5189697852841517, "grad_norm": 0.06908034842187188, "learning_rate": 0.0001964500215470261, "loss": 0.8478, "step": 29560 }, { "epoch": 0.5191453501641532, "grad_norm": 0.04839023508258588, "learning_rate": 0.00019644706355337855, "loss": 0.8545, "step": 29570 }, { "epoch": 0.5193209150441546, "grad_norm": 0.0787462272429842, "learning_rate": 0.00019644410435039843, "loss": 0.8535, "step": 29580 }, { "epoch": 0.5194964799241559, "grad_norm": 0.07626882626330007, "learning_rate": 0.00019644114393812322, "loss": 0.8545, "step": 29590 }, { "epoch": 0.5196720448041574, "grad_norm": 0.0729947224820148, "learning_rate": 0.0001964381823165904, "loss": 0.8582, "step": 29600 }, { "epoch": 0.5198476096841588, "grad_norm": 0.0686030957666668, "learning_rate": 0.00019643521948583756, "loss": 0.8505, "step": 29610 }, { "epoch": 0.5200231745641601, "grad_norm": 0.053398033909572984, "learning_rate": 0.0001964322554459022, "loss": 0.8585, "step": 29620 }, { "epoch": 0.5201987394441616, "grad_norm": 0.06755765140566246, "learning_rate": 0.0001964292901968218, "loss": 0.8583, "step": 29630 }, { "epoch": 0.520374304324163, "grad_norm": 0.06553384308571403, "learning_rate": 0.00019642632373863405, "loss": 0.8566, "step": 29640 }, { "epoch": 0.5205498692041644, "grad_norm": 0.059842833668989986, "learning_rate": 0.0001964233560713765, "loss": 0.8625, "step": 29650 }, { "epoch": 0.5207254340841658, "grad_norm": 0.07104715243923304, "learning_rate": 0.00019642038719508672, "loss": 0.8531, "step": 29660 }, { "epoch": 0.5209009989641672, "grad_norm": 0.06124429862190087, "learning_rate": 0.00019641741710980237, "loss": 0.8532, "step": 29670 }, { "epoch": 0.5210765638441686, "grad_norm": 0.0558207752265672, "learning_rate": 0.00019641444581556107, "loss": 0.8578, "step": 29680 }, { "epoch": 0.52125212872417, "grad_norm": 0.07738167998989112, "learning_rate": 0.0001964114733124004, "loss": 0.8619, "step": 29690 }, { "epoch": 0.5214276936041714, "grad_norm": 0.056507839073384664, "learning_rate": 0.00019640849960035812, "loss": 0.8572, "step": 29700 }, { "epoch": 0.5216032584841728, "grad_norm": 0.09869250331127731, "learning_rate": 0.00019640552467947185, "loss": 0.8501, "step": 29710 }, { "epoch": 0.5217788233641742, "grad_norm": 0.0625304730553803, "learning_rate": 0.0001964025485497793, "loss": 0.8536, "step": 29720 }, { "epoch": 0.5219543882441756, "grad_norm": 0.08749242438326142, "learning_rate": 0.00019639957121131817, "loss": 0.8533, "step": 29730 }, { "epoch": 0.5221299531241771, "grad_norm": 0.05239805550305343, "learning_rate": 0.00019639659266412624, "loss": 0.8575, "step": 29740 }, { "epoch": 0.5223055180041785, "grad_norm": 0.08100977759558976, "learning_rate": 0.00019639361290824114, "loss": 0.857, "step": 29750 }, { "epoch": 0.5224810828841798, "grad_norm": 0.06140604991752547, "learning_rate": 0.00019639063194370072, "loss": 0.8603, "step": 29760 }, { "epoch": 0.5226566477641813, "grad_norm": 0.09263348759768697, "learning_rate": 0.00019638764977054264, "loss": 0.8555, "step": 29770 }, { "epoch": 0.5228322126441827, "grad_norm": 0.061244343164706755, "learning_rate": 0.0001963846663888048, "loss": 0.8556, "step": 29780 }, { "epoch": 0.523007777524184, "grad_norm": 0.06864376755632137, "learning_rate": 0.000196381681798525, "loss": 0.8526, "step": 29790 }, { "epoch": 0.5231833424041855, "grad_norm": 0.053288175408355204, "learning_rate": 0.000196378695999741, "loss": 0.857, "step": 29800 }, { "epoch": 0.5233589072841869, "grad_norm": 0.05846528181562085, "learning_rate": 0.0001963757089924906, "loss": 0.8467, "step": 29810 }, { "epoch": 0.5235344721641882, "grad_norm": 0.07747761914494161, "learning_rate": 0.0001963727207768117, "loss": 0.8422, "step": 29820 }, { "epoch": 0.5237100370441897, "grad_norm": 0.06579598774786691, "learning_rate": 0.00019636973135274215, "loss": 0.858, "step": 29830 }, { "epoch": 0.5238856019241911, "grad_norm": 0.09312063864804114, "learning_rate": 0.00019636674072031983, "loss": 0.8552, "step": 29840 }, { "epoch": 0.5240611668041925, "grad_norm": 0.08032511618278704, "learning_rate": 0.00019636374887958263, "loss": 0.8537, "step": 29850 }, { "epoch": 0.5242367316841939, "grad_norm": 0.07415918580168729, "learning_rate": 0.00019636075583056849, "loss": 0.865, "step": 29860 }, { "epoch": 0.5244122965641953, "grad_norm": 0.05753984393376343, "learning_rate": 0.00019635776157331524, "loss": 0.85, "step": 29870 }, { "epoch": 0.5245878614441967, "grad_norm": 0.05604963030753635, "learning_rate": 0.0001963547661078609, "loss": 0.8659, "step": 29880 }, { "epoch": 0.5247634263241981, "grad_norm": 0.06785437646367144, "learning_rate": 0.0001963517694342434, "loss": 0.8591, "step": 29890 }, { "epoch": 0.5249389912041995, "grad_norm": 0.09506306814963333, "learning_rate": 0.00019634877155250068, "loss": 0.8546, "step": 29900 }, { "epoch": 0.5251145560842009, "grad_norm": 0.06974750303142745, "learning_rate": 0.00019634577246267082, "loss": 0.8591, "step": 29910 }, { "epoch": 0.5252901209642024, "grad_norm": 0.06729471802471466, "learning_rate": 0.0001963427721647917, "loss": 0.8595, "step": 29920 }, { "epoch": 0.5254656858442037, "grad_norm": 0.06782281515909792, "learning_rate": 0.00019633977065890138, "loss": 0.8566, "step": 29930 }, { "epoch": 0.5256412507242051, "grad_norm": 0.06525861458042002, "learning_rate": 0.0001963367679450379, "loss": 0.8521, "step": 29940 }, { "epoch": 0.5258168156042066, "grad_norm": 0.058256550401721216, "learning_rate": 0.00019633376402323932, "loss": 0.856, "step": 29950 }, { "epoch": 0.5259923804842079, "grad_norm": 0.06186496869053203, "learning_rate": 0.00019633075889354367, "loss": 0.8598, "step": 29960 }, { "epoch": 0.5261679453642093, "grad_norm": 0.06698776523393773, "learning_rate": 0.00019632775255598898, "loss": 0.8539, "step": 29970 }, { "epoch": 0.5263435102442108, "grad_norm": 0.074650276538104, "learning_rate": 0.00019632474501061348, "loss": 0.8621, "step": 29980 }, { "epoch": 0.5265190751242121, "grad_norm": 0.056689804999970596, "learning_rate": 0.00019632173625745516, "loss": 0.8576, "step": 29990 }, { "epoch": 0.5266946400042135, "grad_norm": 0.06061149881854521, "learning_rate": 0.00019631872629655216, "loss": 0.8573, "step": 30000 }, { "epoch": 0.526870204884215, "grad_norm": 0.07078620941328169, "learning_rate": 0.00019631571512794261, "loss": 0.8547, "step": 30010 }, { "epoch": 0.5270457697642164, "grad_norm": 0.06992207484519342, "learning_rate": 0.00019631270275166472, "loss": 0.8515, "step": 30020 }, { "epoch": 0.5272213346442177, "grad_norm": 0.049878999241611315, "learning_rate": 0.00019630968916775662, "loss": 0.8561, "step": 30030 }, { "epoch": 0.5273968995242192, "grad_norm": 0.07422145303211666, "learning_rate": 0.00019630667437625648, "loss": 0.8544, "step": 30040 }, { "epoch": 0.5275724644042206, "grad_norm": 0.08524446501905307, "learning_rate": 0.0001963036583772025, "loss": 0.8527, "step": 30050 }, { "epoch": 0.5277480292842219, "grad_norm": 0.057215501819116554, "learning_rate": 0.00019630064117063292, "loss": 0.8648, "step": 30060 }, { "epoch": 0.5279235941642234, "grad_norm": 0.06796894687160122, "learning_rate": 0.00019629762275658597, "loss": 0.8472, "step": 30070 }, { "epoch": 0.5280991590442248, "grad_norm": 0.0668299732384758, "learning_rate": 0.00019629460313509985, "loss": 0.8589, "step": 30080 }, { "epoch": 0.5282747239242263, "grad_norm": 0.06565511938649976, "learning_rate": 0.00019629158230621285, "loss": 0.8599, "step": 30090 }, { "epoch": 0.5284502888042276, "grad_norm": 0.05845835196901997, "learning_rate": 0.00019628856026996324, "loss": 0.8583, "step": 30100 }, { "epoch": 0.528625853684229, "grad_norm": 0.0706525655887078, "learning_rate": 0.0001962855370263893, "loss": 0.8586, "step": 30110 }, { "epoch": 0.5288014185642305, "grad_norm": 0.10266506983966323, "learning_rate": 0.00019628251257552935, "loss": 0.8512, "step": 30120 }, { "epoch": 0.5289769834442318, "grad_norm": 0.0885022510929142, "learning_rate": 0.00019627948691742172, "loss": 0.8633, "step": 30130 }, { "epoch": 0.5291525483242332, "grad_norm": 0.09832188174023573, "learning_rate": 0.0001962764600521047, "loss": 0.857, "step": 30140 }, { "epoch": 0.5293281132042347, "grad_norm": 0.07396252400816616, "learning_rate": 0.0001962734319796167, "loss": 0.8496, "step": 30150 }, { "epoch": 0.529503678084236, "grad_norm": 0.0650703873310173, "learning_rate": 0.00019627040269999603, "loss": 0.8504, "step": 30160 }, { "epoch": 0.5296792429642374, "grad_norm": 0.06432984117066809, "learning_rate": 0.00019626737221328113, "loss": 0.851, "step": 30170 }, { "epoch": 0.5298548078442389, "grad_norm": 0.06143618294659299, "learning_rate": 0.00019626434051951038, "loss": 0.8623, "step": 30180 }, { "epoch": 0.5300303727242403, "grad_norm": 0.05380577223124103, "learning_rate": 0.00019626130761872212, "loss": 0.8573, "step": 30190 }, { "epoch": 0.5302059376042416, "grad_norm": 0.07578576334936009, "learning_rate": 0.00019625827351095487, "loss": 0.8552, "step": 30200 }, { "epoch": 0.5303815024842431, "grad_norm": 0.07081403704294216, "learning_rate": 0.00019625523819624704, "loss": 0.8544, "step": 30210 }, { "epoch": 0.5305570673642445, "grad_norm": 0.06998939231584354, "learning_rate": 0.00019625220167463706, "loss": 0.8647, "step": 30220 }, { "epoch": 0.5307326322442458, "grad_norm": 0.05728505838075256, "learning_rate": 0.00019624916394616346, "loss": 0.858, "step": 30230 }, { "epoch": 0.5309081971242473, "grad_norm": 0.057459633872014006, "learning_rate": 0.00019624612501086472, "loss": 0.8579, "step": 30240 }, { "epoch": 0.5310837620042487, "grad_norm": 0.05466761837763058, "learning_rate": 0.00019624308486877927, "loss": 0.8549, "step": 30250 }, { "epoch": 0.53125932688425, "grad_norm": 0.058019312422236666, "learning_rate": 0.00019624004351994568, "loss": 0.8593, "step": 30260 }, { "epoch": 0.5314348917642515, "grad_norm": 0.06419225351849087, "learning_rate": 0.00019623700096440253, "loss": 0.8499, "step": 30270 }, { "epoch": 0.5316104566442529, "grad_norm": 0.07475582388121765, "learning_rate": 0.0001962339572021883, "loss": 0.8598, "step": 30280 }, { "epoch": 0.5317860215242542, "grad_norm": 0.05967688844256034, "learning_rate": 0.0001962309122333416, "loss": 0.8574, "step": 30290 }, { "epoch": 0.5319615864042557, "grad_norm": 0.053410180101682234, "learning_rate": 0.00019622786605790098, "loss": 0.8519, "step": 30300 }, { "epoch": 0.5321371512842571, "grad_norm": 0.057012365140871726, "learning_rate": 0.00019622481867590503, "loss": 0.8552, "step": 30310 }, { "epoch": 0.5323127161642585, "grad_norm": 0.07147933977304959, "learning_rate": 0.00019622177008739243, "loss": 0.8542, "step": 30320 }, { "epoch": 0.5324882810442599, "grad_norm": 0.07653244976396063, "learning_rate": 0.0001962187202924017, "loss": 0.856, "step": 30330 }, { "epoch": 0.5326638459242613, "grad_norm": 0.08102451666489424, "learning_rate": 0.00019621566929097154, "loss": 0.8543, "step": 30340 }, { "epoch": 0.5328394108042627, "grad_norm": 0.06737488582096392, "learning_rate": 0.00019621261708314063, "loss": 0.859, "step": 30350 }, { "epoch": 0.5330149756842641, "grad_norm": 0.09027076721190101, "learning_rate": 0.00019620956366894757, "loss": 0.8566, "step": 30360 }, { "epoch": 0.5331905405642655, "grad_norm": 0.0672266587577479, "learning_rate": 0.00019620650904843113, "loss": 0.8581, "step": 30370 }, { "epoch": 0.5333661054442669, "grad_norm": 0.06824188088027007, "learning_rate": 0.00019620345322162995, "loss": 0.8496, "step": 30380 }, { "epoch": 0.5335416703242684, "grad_norm": 0.060706502560839856, "learning_rate": 0.00019620039618858278, "loss": 0.8591, "step": 30390 }, { "epoch": 0.5337172352042697, "grad_norm": 0.06759943096142017, "learning_rate": 0.00019619733794932836, "loss": 0.8565, "step": 30400 }, { "epoch": 0.5338928000842711, "grad_norm": 0.061694404328551264, "learning_rate": 0.00019619427850390542, "loss": 0.8606, "step": 30410 }, { "epoch": 0.5340683649642726, "grad_norm": 0.06231748044261503, "learning_rate": 0.0001961912178523527, "loss": 0.8553, "step": 30420 }, { "epoch": 0.5342439298442739, "grad_norm": 0.06729876285049984, "learning_rate": 0.00019618815599470904, "loss": 0.8565, "step": 30430 }, { "epoch": 0.5344194947242753, "grad_norm": 0.05956723198355466, "learning_rate": 0.0001961850929310132, "loss": 0.8541, "step": 30440 }, { "epoch": 0.5345950596042768, "grad_norm": 0.06320112852304388, "learning_rate": 0.00019618202866130397, "loss": 0.8584, "step": 30450 }, { "epoch": 0.5347706244842781, "grad_norm": 0.0815508741179819, "learning_rate": 0.0001961789631856202, "loss": 0.8506, "step": 30460 }, { "epoch": 0.5349461893642796, "grad_norm": 0.07613068479830905, "learning_rate": 0.00019617589650400074, "loss": 0.8582, "step": 30470 }, { "epoch": 0.535121754244281, "grad_norm": 0.07708838474025331, "learning_rate": 0.0001961728286164844, "loss": 0.8587, "step": 30480 }, { "epoch": 0.5352973191242824, "grad_norm": 0.06766788017720894, "learning_rate": 0.00019616975952311012, "loss": 0.8545, "step": 30490 }, { "epoch": 0.5354728840042838, "grad_norm": 0.05073342521741752, "learning_rate": 0.00019616668922391674, "loss": 0.8557, "step": 30500 }, { "epoch": 0.5356484488842852, "grad_norm": 0.06159435314787109, "learning_rate": 0.00019616361771894315, "loss": 0.8606, "step": 30510 }, { "epoch": 0.5358240137642866, "grad_norm": 0.06280479216584198, "learning_rate": 0.0001961605450082283, "loss": 0.8538, "step": 30520 }, { "epoch": 0.535999578644288, "grad_norm": 0.06705575635542556, "learning_rate": 0.0001961574710918111, "loss": 0.8602, "step": 30530 }, { "epoch": 0.5361751435242894, "grad_norm": 0.05553967410853268, "learning_rate": 0.00019615439596973048, "loss": 0.8528, "step": 30540 }, { "epoch": 0.5363507084042908, "grad_norm": 0.10522535807607154, "learning_rate": 0.00019615131964202545, "loss": 0.8495, "step": 30550 }, { "epoch": 0.5365262732842923, "grad_norm": 0.06779916831188469, "learning_rate": 0.000196148242108735, "loss": 0.8565, "step": 30560 }, { "epoch": 0.5367018381642936, "grad_norm": 0.06841085906933309, "learning_rate": 0.00019614516336989804, "loss": 0.859, "step": 30570 }, { "epoch": 0.536877403044295, "grad_norm": 0.07590716610877227, "learning_rate": 0.00019614208342555364, "loss": 0.8464, "step": 30580 }, { "epoch": 0.5370529679242965, "grad_norm": 0.0534862862559443, "learning_rate": 0.0001961390022757408, "loss": 0.8525, "step": 30590 }, { "epoch": 0.5372285328042978, "grad_norm": 0.05944938344393981, "learning_rate": 0.0001961359199204986, "loss": 0.8605, "step": 30600 }, { "epoch": 0.5374040976842992, "grad_norm": 0.06265636178463135, "learning_rate": 0.00019613283635986603, "loss": 0.8582, "step": 30610 }, { "epoch": 0.5375796625643007, "grad_norm": 0.06544692966884402, "learning_rate": 0.0001961297515938822, "loss": 0.8565, "step": 30620 }, { "epoch": 0.537755227444302, "grad_norm": 0.09928856325097558, "learning_rate": 0.00019612666562258618, "loss": 0.8549, "step": 30630 }, { "epoch": 0.5379307923243034, "grad_norm": 0.057894183102515484, "learning_rate": 0.00019612357844601708, "loss": 0.8524, "step": 30640 }, { "epoch": 0.5381063572043049, "grad_norm": 0.04813293914371265, "learning_rate": 0.00019612049006421402, "loss": 0.85, "step": 30650 }, { "epoch": 0.5382819220843063, "grad_norm": 0.04955068903160701, "learning_rate": 0.0001961174004772161, "loss": 0.8529, "step": 30660 }, { "epoch": 0.5384574869643076, "grad_norm": 0.062329597648845825, "learning_rate": 0.00019611430968506253, "loss": 0.8518, "step": 30670 }, { "epoch": 0.5386330518443091, "grad_norm": 0.06497113296450348, "learning_rate": 0.00019611121768779236, "loss": 0.8582, "step": 30680 }, { "epoch": 0.5388086167243105, "grad_norm": 0.061216124242441945, "learning_rate": 0.0001961081244854449, "loss": 0.847, "step": 30690 }, { "epoch": 0.5389841816043118, "grad_norm": 0.05353900989879404, "learning_rate": 0.00019610503007805923, "loss": 0.8606, "step": 30700 }, { "epoch": 0.5391597464843133, "grad_norm": 0.05783572518683303, "learning_rate": 0.00019610193446567458, "loss": 0.8602, "step": 30710 }, { "epoch": 0.5393353113643147, "grad_norm": 0.056585790841109485, "learning_rate": 0.00019609883764833027, "loss": 0.8557, "step": 30720 }, { "epoch": 0.539510876244316, "grad_norm": 0.05221170562159371, "learning_rate": 0.00019609573962606542, "loss": 0.8523, "step": 30730 }, { "epoch": 0.5396864411243175, "grad_norm": 0.05834363663663778, "learning_rate": 0.00019609264039891934, "loss": 0.8512, "step": 30740 }, { "epoch": 0.5398620060043189, "grad_norm": 0.0809738568144274, "learning_rate": 0.00019608953996693126, "loss": 0.8556, "step": 30750 }, { "epoch": 0.5400375708843203, "grad_norm": 0.0696679952259909, "learning_rate": 0.00019608643833014051, "loss": 0.8551, "step": 30760 }, { "epoch": 0.5402131357643217, "grad_norm": 0.06414910892818133, "learning_rate": 0.00019608333548858632, "loss": 0.8562, "step": 30770 }, { "epoch": 0.5403887006443231, "grad_norm": 0.05473219230677461, "learning_rate": 0.0001960802314423081, "loss": 0.8621, "step": 30780 }, { "epoch": 0.5405642655243245, "grad_norm": 0.053320432735048544, "learning_rate": 0.0001960771261913451, "loss": 0.8539, "step": 30790 }, { "epoch": 0.5407398304043259, "grad_norm": 0.06235709531183975, "learning_rate": 0.0001960740197357367, "loss": 0.8553, "step": 30800 }, { "epoch": 0.5409153952843273, "grad_norm": 0.05978238263731294, "learning_rate": 0.00019607091207552224, "loss": 0.8578, "step": 30810 }, { "epoch": 0.5410909601643288, "grad_norm": 0.06387667704979794, "learning_rate": 0.00019606780321074107, "loss": 0.8571, "step": 30820 }, { "epoch": 0.5412665250443301, "grad_norm": 0.07710736470789495, "learning_rate": 0.00019606469314143265, "loss": 0.8603, "step": 30830 }, { "epoch": 0.5414420899243315, "grad_norm": 0.06521402622978321, "learning_rate": 0.00019606158186763635, "loss": 0.8526, "step": 30840 }, { "epoch": 0.541617654804333, "grad_norm": 0.06252391322302357, "learning_rate": 0.0001960584693893916, "loss": 0.8559, "step": 30850 }, { "epoch": 0.5417932196843344, "grad_norm": 0.061241669425140914, "learning_rate": 0.00019605535570673782, "loss": 0.8604, "step": 30860 }, { "epoch": 0.5419687845643357, "grad_norm": 0.06626495373155282, "learning_rate": 0.00019605224081971446, "loss": 0.8569, "step": 30870 }, { "epoch": 0.5421443494443372, "grad_norm": 0.06358856057575256, "learning_rate": 0.00019604912472836097, "loss": 0.8563, "step": 30880 }, { "epoch": 0.5423199143243386, "grad_norm": 0.09675419054697534, "learning_rate": 0.00019604600743271686, "loss": 0.8613, "step": 30890 }, { "epoch": 0.5424954792043399, "grad_norm": 0.059113408003792355, "learning_rate": 0.00019604288893282164, "loss": 0.8547, "step": 30900 }, { "epoch": 0.5426710440843414, "grad_norm": 0.06022339738180512, "learning_rate": 0.00019603976922871478, "loss": 0.8612, "step": 30910 }, { "epoch": 0.5428466089643428, "grad_norm": 0.056798099601534854, "learning_rate": 0.00019603664832043587, "loss": 0.851, "step": 30920 }, { "epoch": 0.5430221738443441, "grad_norm": 0.056533749081201935, "learning_rate": 0.00019603352620802436, "loss": 0.8486, "step": 30930 }, { "epoch": 0.5431977387243456, "grad_norm": 0.09510156735930125, "learning_rate": 0.0001960304028915199, "loss": 0.8565, "step": 30940 }, { "epoch": 0.543373303604347, "grad_norm": 0.04229844551432287, "learning_rate": 0.000196027278370962, "loss": 0.8603, "step": 30950 }, { "epoch": 0.5435488684843484, "grad_norm": 0.060239811817676535, "learning_rate": 0.00019602415264639028, "loss": 0.8614, "step": 30960 }, { "epoch": 0.5437244333643498, "grad_norm": 0.06762920515169867, "learning_rate": 0.00019602102571784436, "loss": 0.8472, "step": 30970 }, { "epoch": 0.5438999982443512, "grad_norm": 0.05664952702937626, "learning_rate": 0.00019601789758536377, "loss": 0.8568, "step": 30980 }, { "epoch": 0.5440755631243526, "grad_norm": 0.08823481670097164, "learning_rate": 0.00019601476824898827, "loss": 0.8516, "step": 30990 }, { "epoch": 0.544251128004354, "grad_norm": 0.05490832266266027, "learning_rate": 0.00019601163770875744, "loss": 0.8586, "step": 31000 }, { "epoch": 0.5444266928843554, "grad_norm": 0.07650527537709481, "learning_rate": 0.00019600850596471092, "loss": 0.8545, "step": 31010 }, { "epoch": 0.5446022577643568, "grad_norm": 0.0547505465974548, "learning_rate": 0.00019600537301688842, "loss": 0.8509, "step": 31020 }, { "epoch": 0.5447778226443583, "grad_norm": 0.08565909626390215, "learning_rate": 0.00019600223886532964, "loss": 0.8537, "step": 31030 }, { "epoch": 0.5449533875243596, "grad_norm": 0.07066931427355183, "learning_rate": 0.00019599910351007432, "loss": 0.8649, "step": 31040 }, { "epoch": 0.545128952404361, "grad_norm": 0.08158795817982192, "learning_rate": 0.00019599596695116215, "loss": 0.8574, "step": 31050 }, { "epoch": 0.5453045172843625, "grad_norm": 0.05932502816743643, "learning_rate": 0.00019599282918863283, "loss": 0.8603, "step": 31060 }, { "epoch": 0.5454800821643638, "grad_norm": 0.05370838824681865, "learning_rate": 0.00019598969022252617, "loss": 0.853, "step": 31070 }, { "epoch": 0.5456556470443652, "grad_norm": 0.06261855347059044, "learning_rate": 0.00019598655005288192, "loss": 0.8519, "step": 31080 }, { "epoch": 0.5458312119243667, "grad_norm": 0.054504655176170175, "learning_rate": 0.00019598340867973992, "loss": 0.8515, "step": 31090 }, { "epoch": 0.546006776804368, "grad_norm": 0.06834320859411637, "learning_rate": 0.0001959802661031399, "loss": 0.8553, "step": 31100 }, { "epoch": 0.5461823416843694, "grad_norm": 0.06619250273961198, "learning_rate": 0.00019597712232312172, "loss": 0.8577, "step": 31110 }, { "epoch": 0.5463579065643709, "grad_norm": 0.08430469435717552, "learning_rate": 0.0001959739773397252, "loss": 0.8488, "step": 31120 }, { "epoch": 0.5465334714443723, "grad_norm": 0.062691773801042, "learning_rate": 0.00019597083115299016, "loss": 0.8537, "step": 31130 }, { "epoch": 0.5467090363243736, "grad_norm": 0.07636042514329008, "learning_rate": 0.00019596768376295654, "loss": 0.8483, "step": 31140 }, { "epoch": 0.5468846012043751, "grad_norm": 0.05347562696605515, "learning_rate": 0.0001959645351696641, "loss": 0.8542, "step": 31150 }, { "epoch": 0.5470601660843765, "grad_norm": 0.08984153980795019, "learning_rate": 0.00019596138537315283, "loss": 0.8557, "step": 31160 }, { "epoch": 0.5472357309643778, "grad_norm": 0.05278466441422529, "learning_rate": 0.0001959582343734626, "loss": 0.8553, "step": 31170 }, { "epoch": 0.5474112958443793, "grad_norm": 0.06887020280374957, "learning_rate": 0.00019595508217063335, "loss": 0.8497, "step": 31180 }, { "epoch": 0.5475868607243807, "grad_norm": 0.06647247352971479, "learning_rate": 0.00019595192876470502, "loss": 0.8562, "step": 31190 }, { "epoch": 0.5477624256043822, "grad_norm": 0.06881807765364689, "learning_rate": 0.00019594877415571752, "loss": 0.8591, "step": 31200 }, { "epoch": 0.5479379904843835, "grad_norm": 0.07713915799901103, "learning_rate": 0.00019594561834371092, "loss": 0.8543, "step": 31210 }, { "epoch": 0.5481135553643849, "grad_norm": 0.07984303996443193, "learning_rate": 0.0001959424613287251, "loss": 0.8526, "step": 31220 }, { "epoch": 0.5482891202443864, "grad_norm": 0.08804034265050545, "learning_rate": 0.00019593930311080009, "loss": 0.8562, "step": 31230 }, { "epoch": 0.5484646851243877, "grad_norm": 0.06776319926171526, "learning_rate": 0.0001959361436899759, "loss": 0.8568, "step": 31240 }, { "epoch": 0.5486402500043891, "grad_norm": 0.06731456618341315, "learning_rate": 0.00019593298306629263, "loss": 0.8527, "step": 31250 }, { "epoch": 0.5488158148843906, "grad_norm": 0.07191946155220058, "learning_rate": 0.00019592982123979024, "loss": 0.851, "step": 31260 }, { "epoch": 0.5489913797643919, "grad_norm": 0.06603551674135963, "learning_rate": 0.00019592665821050882, "loss": 0.8536, "step": 31270 }, { "epoch": 0.5491669446443933, "grad_norm": 0.06994125865469039, "learning_rate": 0.0001959234939784884, "loss": 0.8555, "step": 31280 }, { "epoch": 0.5493425095243948, "grad_norm": 0.06710442787109057, "learning_rate": 0.0001959203285437692, "loss": 0.857, "step": 31290 }, { "epoch": 0.5495180744043962, "grad_norm": 0.07214762271055486, "learning_rate": 0.0001959171619063912, "loss": 0.8611, "step": 31300 }, { "epoch": 0.5496936392843975, "grad_norm": 0.07544033548932451, "learning_rate": 0.00019591399406639458, "loss": 0.8567, "step": 31310 }, { "epoch": 0.549869204164399, "grad_norm": 0.080836020629921, "learning_rate": 0.0001959108250238195, "loss": 0.8541, "step": 31320 }, { "epoch": 0.5500447690444004, "grad_norm": 0.05929326937113656, "learning_rate": 0.00019590765477870602, "loss": 0.8553, "step": 31330 }, { "epoch": 0.5502203339244017, "grad_norm": 0.06155410598352605, "learning_rate": 0.00019590448333109439, "loss": 0.8508, "step": 31340 }, { "epoch": 0.5503958988044032, "grad_norm": 0.05443807569705259, "learning_rate": 0.00019590131068102478, "loss": 0.8521, "step": 31350 }, { "epoch": 0.5505714636844046, "grad_norm": 0.06063489287317628, "learning_rate": 0.00019589813682853734, "loss": 0.856, "step": 31360 }, { "epoch": 0.5507470285644059, "grad_norm": 0.07695275882028457, "learning_rate": 0.00019589496177367235, "loss": 0.8562, "step": 31370 }, { "epoch": 0.5509225934444074, "grad_norm": 0.08067880955928376, "learning_rate": 0.00019589178551647, "loss": 0.8564, "step": 31380 }, { "epoch": 0.5510981583244088, "grad_norm": 0.06923804892431346, "learning_rate": 0.00019588860805697054, "loss": 0.8496, "step": 31390 }, { "epoch": 0.5512737232044101, "grad_norm": 0.052691206177765765, "learning_rate": 0.00019588542939521426, "loss": 0.8654, "step": 31400 }, { "epoch": 0.5514492880844116, "grad_norm": 0.06393804859254164, "learning_rate": 0.00019588224953124135, "loss": 0.8518, "step": 31410 }, { "epoch": 0.551624852964413, "grad_norm": 0.07425297745799572, "learning_rate": 0.0001958790684650922, "loss": 0.8544, "step": 31420 }, { "epoch": 0.5518004178444144, "grad_norm": 0.07324577509927807, "learning_rate": 0.00019587588619680702, "loss": 0.8629, "step": 31430 }, { "epoch": 0.5519759827244158, "grad_norm": 0.06402725019450788, "learning_rate": 0.00019587270272642621, "loss": 0.856, "step": 31440 }, { "epoch": 0.5521515476044172, "grad_norm": 0.06376624815374939, "learning_rate": 0.00019586951805399008, "loss": 0.8654, "step": 31450 }, { "epoch": 0.5523271124844186, "grad_norm": 0.07172595595444321, "learning_rate": 0.00019586633217953897, "loss": 0.8539, "step": 31460 }, { "epoch": 0.55250267736442, "grad_norm": 0.06224971416214019, "learning_rate": 0.00019586314510311323, "loss": 0.8503, "step": 31470 }, { "epoch": 0.5526782422444214, "grad_norm": 0.060967924833527316, "learning_rate": 0.0001958599568247533, "loss": 0.8563, "step": 31480 }, { "epoch": 0.5528538071244228, "grad_norm": 0.052385261010453606, "learning_rate": 0.00019585676734449952, "loss": 0.8516, "step": 31490 }, { "epoch": 0.5530293720044243, "grad_norm": 0.05416649705429727, "learning_rate": 0.00019585357666239233, "loss": 0.8473, "step": 31500 }, { "epoch": 0.5532049368844256, "grad_norm": 0.04253422872447094, "learning_rate": 0.0001958503847784721, "loss": 0.8617, "step": 31510 }, { "epoch": 0.553380501764427, "grad_norm": 0.060460168502510465, "learning_rate": 0.00019584719169277935, "loss": 0.854, "step": 31520 }, { "epoch": 0.5535560666444285, "grad_norm": 0.0620545493050864, "learning_rate": 0.0001958439974053545, "loss": 0.8445, "step": 31530 }, { "epoch": 0.5537316315244298, "grad_norm": 0.058353293539114066, "learning_rate": 0.00019584080191623805, "loss": 0.8569, "step": 31540 }, { "epoch": 0.5539071964044313, "grad_norm": 0.05856229996948684, "learning_rate": 0.0001958376052254704, "loss": 0.8616, "step": 31550 }, { "epoch": 0.5540827612844327, "grad_norm": 0.05200711375220617, "learning_rate": 0.00019583440733309218, "loss": 0.8587, "step": 31560 }, { "epoch": 0.554258326164434, "grad_norm": 0.06762062500426555, "learning_rate": 0.00019583120823914383, "loss": 0.8613, "step": 31570 }, { "epoch": 0.5544338910444355, "grad_norm": 0.05762786111197334, "learning_rate": 0.0001958280079436659, "loss": 0.8658, "step": 31580 }, { "epoch": 0.5546094559244369, "grad_norm": 0.08732367330497617, "learning_rate": 0.0001958248064466989, "loss": 0.8621, "step": 31590 }, { "epoch": 0.5547850208044383, "grad_norm": 0.059696370308523546, "learning_rate": 0.00019582160374828343, "loss": 0.8555, "step": 31600 }, { "epoch": 0.5549605856844397, "grad_norm": 0.0560633481066052, "learning_rate": 0.00019581839984846014, "loss": 0.8561, "step": 31610 }, { "epoch": 0.5551361505644411, "grad_norm": 0.1431558658988991, "learning_rate": 0.00019581519474726948, "loss": 0.8601, "step": 31620 }, { "epoch": 0.5553117154444425, "grad_norm": 0.07375584953127019, "learning_rate": 0.00019581198844475218, "loss": 0.8558, "step": 31630 }, { "epoch": 0.5554872803244439, "grad_norm": 0.05725049858196563, "learning_rate": 0.00019580878094094877, "loss": 0.8549, "step": 31640 }, { "epoch": 0.5556628452044453, "grad_norm": 0.0882034299725775, "learning_rate": 0.00019580557223589995, "loss": 0.8577, "step": 31650 }, { "epoch": 0.5558384100844467, "grad_norm": 0.07256546527978786, "learning_rate": 0.00019580236232964637, "loss": 0.8492, "step": 31660 }, { "epoch": 0.5560139749644482, "grad_norm": 0.06413020605031637, "learning_rate": 0.00019579915122222867, "loss": 0.8546, "step": 31670 }, { "epoch": 0.5561895398444495, "grad_norm": 0.07044355118818768, "learning_rate": 0.0001957959389136876, "loss": 0.8568, "step": 31680 }, { "epoch": 0.5563651047244509, "grad_norm": 0.06980349348485593, "learning_rate": 0.00019579272540406377, "loss": 0.8645, "step": 31690 }, { "epoch": 0.5565406696044524, "grad_norm": 0.058262753208645726, "learning_rate": 0.00019578951069339798, "loss": 0.861, "step": 31700 }, { "epoch": 0.5567162344844537, "grad_norm": 0.08796901541864095, "learning_rate": 0.00019578629478173088, "loss": 0.8609, "step": 31710 }, { "epoch": 0.5568917993644551, "grad_norm": 0.06268754264687475, "learning_rate": 0.0001957830776691033, "loss": 0.8647, "step": 31720 }, { "epoch": 0.5570673642444566, "grad_norm": 0.07622015059842348, "learning_rate": 0.0001957798593555559, "loss": 0.8478, "step": 31730 }, { "epoch": 0.5572429291244579, "grad_norm": 0.08102497416470163, "learning_rate": 0.0001957766398411296, "loss": 0.8481, "step": 31740 }, { "epoch": 0.5574184940044593, "grad_norm": 0.06949774285406789, "learning_rate": 0.0001957734191258651, "loss": 0.8587, "step": 31750 }, { "epoch": 0.5575940588844608, "grad_norm": 0.08512747486995045, "learning_rate": 0.00019577019720980315, "loss": 0.8451, "step": 31760 }, { "epoch": 0.5577696237644622, "grad_norm": 0.08706121732980912, "learning_rate": 0.00019576697409298468, "loss": 0.8507, "step": 31770 }, { "epoch": 0.5579451886444635, "grad_norm": 0.06143680051100377, "learning_rate": 0.0001957637497754505, "loss": 0.8573, "step": 31780 }, { "epoch": 0.558120753524465, "grad_norm": 0.054589618921446016, "learning_rate": 0.00019576052425724142, "loss": 0.856, "step": 31790 }, { "epoch": 0.5582963184044664, "grad_norm": 0.08196412932858765, "learning_rate": 0.00019575729753839834, "loss": 0.8513, "step": 31800 }, { "epoch": 0.5584718832844677, "grad_norm": 0.0541628824593687, "learning_rate": 0.00019575406961896215, "loss": 0.8523, "step": 31810 }, { "epoch": 0.5586474481644692, "grad_norm": 0.0698271835997056, "learning_rate": 0.00019575084049897374, "loss": 0.8574, "step": 31820 }, { "epoch": 0.5588230130444706, "grad_norm": 0.06193716612672238, "learning_rate": 0.000195747610178474, "loss": 0.8515, "step": 31830 }, { "epoch": 0.5589985779244719, "grad_norm": 0.08167504284755635, "learning_rate": 0.0001957443786575039, "loss": 0.855, "step": 31840 }, { "epoch": 0.5591741428044734, "grad_norm": 0.0642995610333742, "learning_rate": 0.00019574114593610438, "loss": 0.8605, "step": 31850 }, { "epoch": 0.5593497076844748, "grad_norm": 0.06712826869952879, "learning_rate": 0.00019573791201431636, "loss": 0.8557, "step": 31860 }, { "epoch": 0.5595252725644762, "grad_norm": 0.06389846978137712, "learning_rate": 0.00019573467689218088, "loss": 0.8542, "step": 31870 }, { "epoch": 0.5597008374444776, "grad_norm": 0.06442824021700329, "learning_rate": 0.00019573144056973885, "loss": 0.8557, "step": 31880 }, { "epoch": 0.559876402324479, "grad_norm": 0.058089365076181794, "learning_rate": 0.00019572820304703133, "loss": 0.8496, "step": 31890 }, { "epoch": 0.5600519672044804, "grad_norm": 0.08149158635271352, "learning_rate": 0.00019572496432409926, "loss": 0.8615, "step": 31900 }, { "epoch": 0.5602275320844818, "grad_norm": 0.07666930655525285, "learning_rate": 0.00019572172440098382, "loss": 0.8579, "step": 31910 }, { "epoch": 0.5604030969644832, "grad_norm": 0.10159118756022271, "learning_rate": 0.00019571848327772591, "loss": 0.858, "step": 31920 }, { "epoch": 0.5605786618444847, "grad_norm": 0.08841384980410794, "learning_rate": 0.00019571524095436674, "loss": 0.8604, "step": 31930 }, { "epoch": 0.560754226724486, "grad_norm": 0.06606160188179354, "learning_rate": 0.00019571199743094727, "loss": 0.8524, "step": 31940 }, { "epoch": 0.5609297916044874, "grad_norm": 0.09045001375640095, "learning_rate": 0.00019570875270750863, "loss": 0.8483, "step": 31950 }, { "epoch": 0.5611053564844889, "grad_norm": 0.06985870669150247, "learning_rate": 0.00019570550678409197, "loss": 0.8552, "step": 31960 }, { "epoch": 0.5612809213644903, "grad_norm": 0.05399414284616059, "learning_rate": 0.00019570225966073835, "loss": 0.8584, "step": 31970 }, { "epoch": 0.5614564862444916, "grad_norm": 0.05778512582234772, "learning_rate": 0.000195699011337489, "loss": 0.8516, "step": 31980 }, { "epoch": 0.5616320511244931, "grad_norm": 0.06241525841741541, "learning_rate": 0.000195695761814385, "loss": 0.855, "step": 31990 }, { "epoch": 0.5618076160044945, "grad_norm": 0.10594391546805544, "learning_rate": 0.00019569251109146756, "loss": 0.857, "step": 32000 }, { "epoch": 0.5619831808844958, "grad_norm": 0.09370594884544348, "learning_rate": 0.00019568925916877786, "loss": 0.8493, "step": 32010 }, { "epoch": 0.5621587457644973, "grad_norm": 0.07148149568791162, "learning_rate": 0.00019568600604635708, "loss": 0.8589, "step": 32020 }, { "epoch": 0.5623343106444987, "grad_norm": 0.05994117207920045, "learning_rate": 0.00019568275172424646, "loss": 0.8563, "step": 32030 }, { "epoch": 0.5625098755245, "grad_norm": 0.06913380368814628, "learning_rate": 0.00019567949620248726, "loss": 0.8669, "step": 32040 }, { "epoch": 0.5626854404045015, "grad_norm": 0.06356107213516923, "learning_rate": 0.00019567623948112066, "loss": 0.8581, "step": 32050 }, { "epoch": 0.5628610052845029, "grad_norm": 0.06112409139027663, "learning_rate": 0.000195672981560188, "loss": 0.8532, "step": 32060 }, { "epoch": 0.5630365701645043, "grad_norm": 0.05747512904435455, "learning_rate": 0.00019566972243973047, "loss": 0.8546, "step": 32070 }, { "epoch": 0.5632121350445057, "grad_norm": 0.1052427911599045, "learning_rate": 0.00019566646211978945, "loss": 0.8503, "step": 32080 }, { "epoch": 0.5633876999245071, "grad_norm": 0.1009751274224381, "learning_rate": 0.0001956632006004062, "loss": 0.8578, "step": 32090 }, { "epoch": 0.5635632648045085, "grad_norm": 0.054740770294274876, "learning_rate": 0.00019565993788162207, "loss": 0.8561, "step": 32100 }, { "epoch": 0.56373882968451, "grad_norm": 0.06933507814274649, "learning_rate": 0.00019565667396347837, "loss": 0.8556, "step": 32110 }, { "epoch": 0.5639143945645113, "grad_norm": 0.06608990849334938, "learning_rate": 0.00019565340884601645, "loss": 0.8588, "step": 32120 }, { "epoch": 0.5640899594445127, "grad_norm": 0.0725117537252476, "learning_rate": 0.00019565014252927775, "loss": 0.8547, "step": 32130 }, { "epoch": 0.5642655243245142, "grad_norm": 0.07125325370309396, "learning_rate": 0.00019564687501330357, "loss": 0.8505, "step": 32140 }, { "epoch": 0.5644410892045155, "grad_norm": 0.09132015742005194, "learning_rate": 0.00019564360629813535, "loss": 0.8448, "step": 32150 }, { "epoch": 0.5646166540845169, "grad_norm": 0.06305975153096847, "learning_rate": 0.0001956403363838145, "loss": 0.8565, "step": 32160 }, { "epoch": 0.5647922189645184, "grad_norm": 0.06922877442031199, "learning_rate": 0.00019563706527038247, "loss": 0.8493, "step": 32170 }, { "epoch": 0.5649677838445197, "grad_norm": 0.05361829627271556, "learning_rate": 0.00019563379295788063, "loss": 0.851, "step": 32180 }, { "epoch": 0.5651433487245211, "grad_norm": 0.07667918495855176, "learning_rate": 0.00019563051944635057, "loss": 0.8575, "step": 32190 }, { "epoch": 0.5653189136045226, "grad_norm": 0.05309647160987069, "learning_rate": 0.00019562724473583365, "loss": 0.8518, "step": 32200 }, { "epoch": 0.5654944784845239, "grad_norm": 0.05995795900810519, "learning_rate": 0.00019562396882637142, "loss": 0.8519, "step": 32210 }, { "epoch": 0.5656700433645253, "grad_norm": 0.06778232524308747, "learning_rate": 0.00019562069171800536, "loss": 0.8459, "step": 32220 }, { "epoch": 0.5658456082445268, "grad_norm": 0.10834944396195287, "learning_rate": 0.000195617413410777, "loss": 0.8448, "step": 32230 }, { "epoch": 0.5660211731245282, "grad_norm": 0.06556827610495528, "learning_rate": 0.0001956141339047279, "loss": 0.8515, "step": 32240 }, { "epoch": 0.5661967380045295, "grad_norm": 0.09547206963110622, "learning_rate": 0.00019561085319989954, "loss": 0.8509, "step": 32250 }, { "epoch": 0.566372302884531, "grad_norm": 0.056858823761605094, "learning_rate": 0.00019560757129633358, "loss": 0.8582, "step": 32260 }, { "epoch": 0.5665478677645324, "grad_norm": 0.08110861930422189, "learning_rate": 0.00019560428819407157, "loss": 0.8537, "step": 32270 }, { "epoch": 0.5667234326445338, "grad_norm": 0.07549000548632909, "learning_rate": 0.00019560100389315506, "loss": 0.864, "step": 32280 }, { "epoch": 0.5668989975245352, "grad_norm": 0.058631737890289484, "learning_rate": 0.00019559771839362575, "loss": 0.8567, "step": 32290 }, { "epoch": 0.5670745624045366, "grad_norm": 0.071333860048227, "learning_rate": 0.00019559443169552516, "loss": 0.8561, "step": 32300 }, { "epoch": 0.567250127284538, "grad_norm": 0.06298551312018201, "learning_rate": 0.00019559114379889502, "loss": 0.8554, "step": 32310 }, { "epoch": 0.5674256921645394, "grad_norm": 0.050211034339646285, "learning_rate": 0.00019558785470377696, "loss": 0.8517, "step": 32320 }, { "epoch": 0.5676012570445408, "grad_norm": 0.10202404898623746, "learning_rate": 0.00019558456441021266, "loss": 0.8536, "step": 32330 }, { "epoch": 0.5677768219245423, "grad_norm": 0.05383635805431459, "learning_rate": 0.0001955812729182438, "loss": 0.8639, "step": 32340 }, { "epoch": 0.5679523868045436, "grad_norm": 0.09278525788411675, "learning_rate": 0.0001955779802279121, "loss": 0.8601, "step": 32350 }, { "epoch": 0.568127951684545, "grad_norm": 0.0697810754164993, "learning_rate": 0.00019557468633925922, "loss": 0.8554, "step": 32360 }, { "epoch": 0.5683035165645465, "grad_norm": 0.062017064714340125, "learning_rate": 0.00019557139125232697, "loss": 0.851, "step": 32370 }, { "epoch": 0.5684790814445478, "grad_norm": 0.05202628934185076, "learning_rate": 0.00019556809496715707, "loss": 0.8536, "step": 32380 }, { "epoch": 0.5686546463245492, "grad_norm": 0.07075653120874596, "learning_rate": 0.00019556479748379126, "loss": 0.8619, "step": 32390 }, { "epoch": 0.5688302112045507, "grad_norm": 0.06935967163033377, "learning_rate": 0.00019556149880227134, "loss": 0.8487, "step": 32400 }, { "epoch": 0.569005776084552, "grad_norm": 0.06541204725450121, "learning_rate": 0.00019555819892263916, "loss": 0.8576, "step": 32410 }, { "epoch": 0.5691813409645534, "grad_norm": 0.07174605139084908, "learning_rate": 0.00019555489784493644, "loss": 0.853, "step": 32420 }, { "epoch": 0.5693569058445549, "grad_norm": 0.06249874602560976, "learning_rate": 0.00019555159556920506, "loss": 0.8591, "step": 32430 }, { "epoch": 0.5695324707245563, "grad_norm": 0.06694068397331837, "learning_rate": 0.00019554829209548683, "loss": 0.8498, "step": 32440 }, { "epoch": 0.5697080356045576, "grad_norm": 0.06226893561156828, "learning_rate": 0.0001955449874238236, "loss": 0.8587, "step": 32450 }, { "epoch": 0.5698836004845591, "grad_norm": 0.08524853511409376, "learning_rate": 0.00019554168155425726, "loss": 0.8578, "step": 32460 }, { "epoch": 0.5700591653645605, "grad_norm": 0.05711813520034612, "learning_rate": 0.0001955383744868297, "loss": 0.8552, "step": 32470 }, { "epoch": 0.5702347302445618, "grad_norm": 0.06821456133069702, "learning_rate": 0.00019553506622158287, "loss": 0.8546, "step": 32480 }, { "epoch": 0.5704102951245633, "grad_norm": 0.0704241835836233, "learning_rate": 0.0001955317567585586, "loss": 0.8551, "step": 32490 }, { "epoch": 0.5705858600045647, "grad_norm": 0.050187446766258664, "learning_rate": 0.00019552844609779883, "loss": 0.8514, "step": 32500 }, { "epoch": 0.570761424884566, "grad_norm": 0.06157481285610321, "learning_rate": 0.00019552513423934557, "loss": 0.8566, "step": 32510 }, { "epoch": 0.5709369897645675, "grad_norm": 0.05869478772363935, "learning_rate": 0.00019552182118324075, "loss": 0.8522, "step": 32520 }, { "epoch": 0.5711125546445689, "grad_norm": 0.09613495052058811, "learning_rate": 0.00019551850692952626, "loss": 0.8543, "step": 32530 }, { "epoch": 0.5712881195245703, "grad_norm": 0.06834327998859761, "learning_rate": 0.00019551519147824426, "loss": 0.8601, "step": 32540 }, { "epoch": 0.5714636844045717, "grad_norm": 0.06322959783553499, "learning_rate": 0.0001955118748294366, "loss": 0.8614, "step": 32550 }, { "epoch": 0.5716392492845731, "grad_norm": 0.08449027209011815, "learning_rate": 0.00019550855698314542, "loss": 0.8527, "step": 32560 }, { "epoch": 0.5718148141645745, "grad_norm": 0.06708895108790885, "learning_rate": 0.00019550523793941273, "loss": 0.8573, "step": 32570 }, { "epoch": 0.571990379044576, "grad_norm": 0.056215528053975644, "learning_rate": 0.00019550191769828054, "loss": 0.8484, "step": 32580 }, { "epoch": 0.5721659439245773, "grad_norm": 0.06035692082036451, "learning_rate": 0.00019549859625979092, "loss": 0.8513, "step": 32590 }, { "epoch": 0.5723415088045787, "grad_norm": 0.06773662296722908, "learning_rate": 0.000195495273623986, "loss": 0.8559, "step": 32600 }, { "epoch": 0.5725170736845802, "grad_norm": 0.057970265147435716, "learning_rate": 0.00019549194979090784, "loss": 0.8572, "step": 32610 }, { "epoch": 0.5726926385645815, "grad_norm": 0.06149947485892703, "learning_rate": 0.0001954886247605986, "loss": 0.8629, "step": 32620 }, { "epoch": 0.5728682034445829, "grad_norm": 0.06418158817260643, "learning_rate": 0.0001954852985331003, "loss": 0.8541, "step": 32630 }, { "epoch": 0.5730437683245844, "grad_norm": 0.06820565420635122, "learning_rate": 0.00019548197110845524, "loss": 0.8548, "step": 32640 }, { "epoch": 0.5732193332045857, "grad_norm": 0.0850417782025672, "learning_rate": 0.00019547864248670545, "loss": 0.8585, "step": 32650 }, { "epoch": 0.5733948980845872, "grad_norm": 0.05512428362927731, "learning_rate": 0.00019547531266789317, "loss": 0.8652, "step": 32660 }, { "epoch": 0.5735704629645886, "grad_norm": 0.05663140257434919, "learning_rate": 0.00019547198165206057, "loss": 0.8535, "step": 32670 }, { "epoch": 0.57374602784459, "grad_norm": 0.045479998677152536, "learning_rate": 0.00019546864943924987, "loss": 0.8523, "step": 32680 }, { "epoch": 0.5739215927245914, "grad_norm": 0.056360798377025616, "learning_rate": 0.00019546531602950324, "loss": 0.8536, "step": 32690 }, { "epoch": 0.5740971576045928, "grad_norm": 0.06728035242933054, "learning_rate": 0.000195461981422863, "loss": 0.8612, "step": 32700 }, { "epoch": 0.5742727224845942, "grad_norm": 0.05527212856242969, "learning_rate": 0.0001954586456193713, "loss": 0.8554, "step": 32710 }, { "epoch": 0.5744482873645956, "grad_norm": 0.063170391597777, "learning_rate": 0.00019545530861907052, "loss": 0.8544, "step": 32720 }, { "epoch": 0.574623852244597, "grad_norm": 0.06047200528152274, "learning_rate": 0.00019545197042200284, "loss": 0.8587, "step": 32730 }, { "epoch": 0.5747994171245984, "grad_norm": 0.07210847914944883, "learning_rate": 0.00019544863102821062, "loss": 0.8529, "step": 32740 }, { "epoch": 0.5749749820045998, "grad_norm": 0.08803498701511522, "learning_rate": 0.0001954452904377361, "loss": 0.8577, "step": 32750 }, { "epoch": 0.5751505468846012, "grad_norm": 0.06343911457050976, "learning_rate": 0.00019544194865062165, "loss": 0.8572, "step": 32760 }, { "epoch": 0.5753261117646026, "grad_norm": 0.06144494067582817, "learning_rate": 0.00019543860566690965, "loss": 0.8538, "step": 32770 }, { "epoch": 0.575501676644604, "grad_norm": 0.05758203604263062, "learning_rate": 0.00019543526148664239, "loss": 0.8615, "step": 32780 }, { "epoch": 0.5756772415246054, "grad_norm": 0.07149944263812592, "learning_rate": 0.00019543191610986227, "loss": 0.8573, "step": 32790 }, { "epoch": 0.5758528064046068, "grad_norm": 0.07822340244045609, "learning_rate": 0.00019542856953661168, "loss": 0.8517, "step": 32800 }, { "epoch": 0.5760283712846083, "grad_norm": 0.06618174241528814, "learning_rate": 0.00019542522176693304, "loss": 0.857, "step": 32810 }, { "epoch": 0.5762039361646096, "grad_norm": 0.06891477095979336, "learning_rate": 0.0001954218728008687, "loss": 0.8531, "step": 32820 }, { "epoch": 0.576379501044611, "grad_norm": 0.07120260716374313, "learning_rate": 0.00019541852263846115, "loss": 0.8529, "step": 32830 }, { "epoch": 0.5765550659246125, "grad_norm": 0.0725433459095679, "learning_rate": 0.00019541517127975286, "loss": 0.8537, "step": 32840 }, { "epoch": 0.5767306308046138, "grad_norm": 0.06777706835747493, "learning_rate": 0.0001954118187247862, "loss": 0.8623, "step": 32850 }, { "epoch": 0.5769061956846152, "grad_norm": 0.05555952558711061, "learning_rate": 0.00019540846497360375, "loss": 0.8467, "step": 32860 }, { "epoch": 0.5770817605646167, "grad_norm": 0.07965141384146267, "learning_rate": 0.0001954051100262479, "loss": 0.8507, "step": 32870 }, { "epoch": 0.577257325444618, "grad_norm": 0.05288075455877257, "learning_rate": 0.00019540175388276128, "loss": 0.8501, "step": 32880 }, { "epoch": 0.5774328903246194, "grad_norm": 0.058908304935020514, "learning_rate": 0.0001953983965431863, "loss": 0.842, "step": 32890 }, { "epoch": 0.5776084552046209, "grad_norm": 0.05158073567780164, "learning_rate": 0.00019539503800756558, "loss": 0.8532, "step": 32900 }, { "epoch": 0.5777840200846223, "grad_norm": 0.06619999891439178, "learning_rate": 0.0001953916782759416, "loss": 0.861, "step": 32910 }, { "epoch": 0.5779595849646236, "grad_norm": 0.0658944623556457, "learning_rate": 0.000195388317348357, "loss": 0.8575, "step": 32920 }, { "epoch": 0.5781351498446251, "grad_norm": 0.06149888042412686, "learning_rate": 0.0001953849552248543, "loss": 0.851, "step": 32930 }, { "epoch": 0.5783107147246265, "grad_norm": 0.049793700414776314, "learning_rate": 0.00019538159190547614, "loss": 0.856, "step": 32940 }, { "epoch": 0.5784862796046278, "grad_norm": 0.07549503528031541, "learning_rate": 0.00019537822739026516, "loss": 0.8569, "step": 32950 }, { "epoch": 0.5786618444846293, "grad_norm": 0.05699737536255877, "learning_rate": 0.0001953748616792639, "loss": 0.8556, "step": 32960 }, { "epoch": 0.5788374093646307, "grad_norm": 0.0594727773713429, "learning_rate": 0.00019537149477251504, "loss": 0.8487, "step": 32970 }, { "epoch": 0.579012974244632, "grad_norm": 0.07689454575683302, "learning_rate": 0.00019536812667006128, "loss": 0.8558, "step": 32980 }, { "epoch": 0.5791885391246335, "grad_norm": 0.05671456229033563, "learning_rate": 0.00019536475737194528, "loss": 0.858, "step": 32990 }, { "epoch": 0.5793641040046349, "grad_norm": 0.05469247374942836, "learning_rate": 0.00019536138687820974, "loss": 0.8517, "step": 33000 }, { "epoch": 0.5795396688846364, "grad_norm": 0.07712834583253583, "learning_rate": 0.00019535801518889728, "loss": 0.8522, "step": 33010 }, { "epoch": 0.5797152337646377, "grad_norm": 0.06989569983795088, "learning_rate": 0.00019535464230405073, "loss": 0.8561, "step": 33020 }, { "epoch": 0.5798907986446391, "grad_norm": 0.07395283590236464, "learning_rate": 0.00019535126822371277, "loss": 0.8567, "step": 33030 }, { "epoch": 0.5800663635246406, "grad_norm": 0.06999036792271435, "learning_rate": 0.00019534789294792617, "loss": 0.8573, "step": 33040 }, { "epoch": 0.580241928404642, "grad_norm": 0.09420814162812476, "learning_rate": 0.00019534451647673366, "loss": 0.8517, "step": 33050 }, { "epoch": 0.5804174932846433, "grad_norm": 0.07120906120907611, "learning_rate": 0.00019534113881017809, "loss": 0.8592, "step": 33060 }, { "epoch": 0.5805930581646448, "grad_norm": 0.06391727084741361, "learning_rate": 0.00019533775994830217, "loss": 0.8515, "step": 33070 }, { "epoch": 0.5807686230446462, "grad_norm": 0.07043152272685764, "learning_rate": 0.00019533437989114875, "loss": 0.8566, "step": 33080 }, { "epoch": 0.5809441879246475, "grad_norm": 0.0614115125025493, "learning_rate": 0.0001953309986387607, "loss": 0.8485, "step": 33090 }, { "epoch": 0.581119752804649, "grad_norm": 0.06917892675822503, "learning_rate": 0.00019532761619118076, "loss": 0.85, "step": 33100 }, { "epoch": 0.5812953176846504, "grad_norm": 0.054761103618492304, "learning_rate": 0.00019532423254845188, "loss": 0.8577, "step": 33110 }, { "epoch": 0.5814708825646517, "grad_norm": 0.0681351329419022, "learning_rate": 0.0001953208477106169, "loss": 0.8587, "step": 33120 }, { "epoch": 0.5816464474446532, "grad_norm": 0.0706103397974869, "learning_rate": 0.00019531746167771868, "loss": 0.8648, "step": 33130 }, { "epoch": 0.5818220123246546, "grad_norm": 0.06694398238092401, "learning_rate": 0.00019531407444980017, "loss": 0.8509, "step": 33140 }, { "epoch": 0.581997577204656, "grad_norm": 0.05520543034692205, "learning_rate": 0.00019531068602690426, "loss": 0.8524, "step": 33150 }, { "epoch": 0.5821731420846574, "grad_norm": 0.0887314141279773, "learning_rate": 0.00019530729640907386, "loss": 0.8511, "step": 33160 }, { "epoch": 0.5823487069646588, "grad_norm": 0.06860135272727876, "learning_rate": 0.00019530390559635195, "loss": 0.8531, "step": 33170 }, { "epoch": 0.5825242718446602, "grad_norm": 0.06795289711491338, "learning_rate": 0.00019530051358878148, "loss": 0.8531, "step": 33180 }, { "epoch": 0.5826998367246616, "grad_norm": 0.07784408291646687, "learning_rate": 0.00019529712038640545, "loss": 0.8537, "step": 33190 }, { "epoch": 0.582875401604663, "grad_norm": 0.07968604495453765, "learning_rate": 0.0001952937259892668, "loss": 0.8538, "step": 33200 }, { "epoch": 0.5830509664846644, "grad_norm": 0.05766934665639535, "learning_rate": 0.0001952903303974086, "loss": 0.8729, "step": 33210 }, { "epoch": 0.5832265313646658, "grad_norm": 0.06313669813406546, "learning_rate": 0.00019528693361087383, "loss": 0.8596, "step": 33220 }, { "epoch": 0.5834020962446672, "grad_norm": 0.09774903074276924, "learning_rate": 0.00019528353562970555, "loss": 0.8574, "step": 33230 }, { "epoch": 0.5835776611246686, "grad_norm": 0.07867385189941656, "learning_rate": 0.0001952801364539468, "loss": 0.859, "step": 33240 }, { "epoch": 0.5837532260046701, "grad_norm": 0.06524542970636586, "learning_rate": 0.00019527673608364063, "loss": 0.8603, "step": 33250 }, { "epoch": 0.5839287908846714, "grad_norm": 0.05392323491940685, "learning_rate": 0.00019527333451883018, "loss": 0.8545, "step": 33260 }, { "epoch": 0.5841043557646728, "grad_norm": 0.061512778826081665, "learning_rate": 0.0001952699317595585, "loss": 0.8564, "step": 33270 }, { "epoch": 0.5842799206446743, "grad_norm": 0.07206003041884652, "learning_rate": 0.0001952665278058687, "loss": 0.8509, "step": 33280 }, { "epoch": 0.5844554855246756, "grad_norm": 0.06589693294560058, "learning_rate": 0.00019526312265780396, "loss": 0.8469, "step": 33290 }, { "epoch": 0.584631050404677, "grad_norm": 0.09184637925104366, "learning_rate": 0.0001952597163154074, "loss": 0.8552, "step": 33300 }, { "epoch": 0.5848066152846785, "grad_norm": 0.06652555081692095, "learning_rate": 0.00019525630877872214, "loss": 0.8542, "step": 33310 }, { "epoch": 0.5849821801646798, "grad_norm": 0.08885322685765253, "learning_rate": 0.00019525290004779138, "loss": 0.8506, "step": 33320 }, { "epoch": 0.5851577450446812, "grad_norm": 0.07731486012239168, "learning_rate": 0.00019524949012265832, "loss": 0.855, "step": 33330 }, { "epoch": 0.5853333099246827, "grad_norm": 0.05903663132414971, "learning_rate": 0.00019524607900336615, "loss": 0.8525, "step": 33340 }, { "epoch": 0.585508874804684, "grad_norm": 0.08349084797630148, "learning_rate": 0.0001952426666899581, "loss": 0.8507, "step": 33350 }, { "epoch": 0.5856844396846854, "grad_norm": 0.06649268638869657, "learning_rate": 0.00019523925318247742, "loss": 0.8565, "step": 33360 }, { "epoch": 0.5858600045646869, "grad_norm": 0.08653840606044026, "learning_rate": 0.0001952358384809673, "loss": 0.8552, "step": 33370 }, { "epoch": 0.5860355694446883, "grad_norm": 0.05117611212437168, "learning_rate": 0.0001952324225854711, "loss": 0.857, "step": 33380 }, { "epoch": 0.5862111343246897, "grad_norm": 0.05208601775011767, "learning_rate": 0.000195229005496032, "loss": 0.8547, "step": 33390 }, { "epoch": 0.5863866992046911, "grad_norm": 0.0699694107702731, "learning_rate": 0.00019522558721269342, "loss": 0.8538, "step": 33400 }, { "epoch": 0.5865622640846925, "grad_norm": 0.05411439753909007, "learning_rate": 0.0001952221677354985, "loss": 0.8567, "step": 33410 }, { "epoch": 0.586737828964694, "grad_norm": 0.07012237501175037, "learning_rate": 0.00019521874706449071, "loss": 0.8561, "step": 33420 }, { "epoch": 0.5869133938446953, "grad_norm": 0.08970227560560895, "learning_rate": 0.00019521532519971332, "loss": 0.8596, "step": 33430 }, { "epoch": 0.5870889587246967, "grad_norm": 0.07390837647039934, "learning_rate": 0.0001952119021412097, "loss": 0.8556, "step": 33440 }, { "epoch": 0.5872645236046982, "grad_norm": 0.06846710226296572, "learning_rate": 0.00019520847788902324, "loss": 0.8541, "step": 33450 }, { "epoch": 0.5874400884846995, "grad_norm": 0.08030473220427722, "learning_rate": 0.00019520505244319727, "loss": 0.8516, "step": 33460 }, { "epoch": 0.5876156533647009, "grad_norm": 0.06515571055005846, "learning_rate": 0.00019520162580377527, "loss": 0.854, "step": 33470 }, { "epoch": 0.5877912182447024, "grad_norm": 0.06341075296202849, "learning_rate": 0.00019519819797080058, "loss": 0.8563, "step": 33480 }, { "epoch": 0.5879667831247037, "grad_norm": 0.056854140427527455, "learning_rate": 0.0001951947689443167, "loss": 0.8563, "step": 33490 }, { "epoch": 0.5881423480047051, "grad_norm": 0.06403078479278276, "learning_rate": 0.00019519133872436705, "loss": 0.8647, "step": 33500 }, { "epoch": 0.5883179128847066, "grad_norm": 0.10009494403915625, "learning_rate": 0.0001951879073109951, "loss": 0.8536, "step": 33510 }, { "epoch": 0.588493477764708, "grad_norm": 0.0791051588423064, "learning_rate": 0.00019518447470424428, "loss": 0.8508, "step": 33520 }, { "epoch": 0.5886690426447093, "grad_norm": 0.07866982254835435, "learning_rate": 0.00019518104090415813, "loss": 0.8436, "step": 33530 }, { "epoch": 0.5888446075247108, "grad_norm": 0.061089543174193836, "learning_rate": 0.00019517760591078015, "loss": 0.8593, "step": 33540 }, { "epoch": 0.5890201724047122, "grad_norm": 0.13594521794257305, "learning_rate": 0.0001951741697241538, "loss": 0.865, "step": 33550 }, { "epoch": 0.5891957372847135, "grad_norm": 0.07158132895896371, "learning_rate": 0.00019517073234432273, "loss": 0.8536, "step": 33560 }, { "epoch": 0.589371302164715, "grad_norm": 0.12006549550629925, "learning_rate": 0.00019516729377133042, "loss": 0.8525, "step": 33570 }, { "epoch": 0.5895468670447164, "grad_norm": 0.06425124151914942, "learning_rate": 0.00019516385400522042, "loss": 0.8572, "step": 33580 }, { "epoch": 0.5897224319247177, "grad_norm": 0.06686111936501374, "learning_rate": 0.0001951604130460364, "loss": 0.8518, "step": 33590 }, { "epoch": 0.5898979968047192, "grad_norm": 0.060975112248772605, "learning_rate": 0.00019515697089382187, "loss": 0.8477, "step": 33600 }, { "epoch": 0.5900735616847206, "grad_norm": 0.05049322131511545, "learning_rate": 0.00019515352754862045, "loss": 0.8588, "step": 33610 }, { "epoch": 0.590249126564722, "grad_norm": 0.10198541671936864, "learning_rate": 0.00019515008301047583, "loss": 0.8526, "step": 33620 }, { "epoch": 0.5904246914447234, "grad_norm": 0.0712410478804826, "learning_rate": 0.00019514663727943158, "loss": 0.8554, "step": 33630 }, { "epoch": 0.5906002563247248, "grad_norm": 0.06458814114066187, "learning_rate": 0.00019514319035553144, "loss": 0.8482, "step": 33640 }, { "epoch": 0.5907758212047262, "grad_norm": 0.053621302605691055, "learning_rate": 0.00019513974223881898, "loss": 0.8471, "step": 33650 }, { "epoch": 0.5909513860847276, "grad_norm": 0.054680010622272554, "learning_rate": 0.00019513629292933798, "loss": 0.8526, "step": 33660 }, { "epoch": 0.591126950964729, "grad_norm": 0.06723672244578614, "learning_rate": 0.0001951328424271321, "loss": 0.8533, "step": 33670 }, { "epoch": 0.5913025158447304, "grad_norm": 0.06361498561948252, "learning_rate": 0.00019512939073224508, "loss": 0.859, "step": 33680 }, { "epoch": 0.5914780807247318, "grad_norm": 0.06683229989138366, "learning_rate": 0.00019512593784472065, "loss": 0.8536, "step": 33690 }, { "epoch": 0.5916536456047332, "grad_norm": 0.06050614450745206, "learning_rate": 0.0001951224837646025, "loss": 0.8588, "step": 33700 }, { "epoch": 0.5918292104847346, "grad_norm": 0.05907782241003819, "learning_rate": 0.00019511902849193448, "loss": 0.8536, "step": 33710 }, { "epoch": 0.5920047753647361, "grad_norm": 0.09188242198720971, "learning_rate": 0.00019511557202676031, "loss": 0.8516, "step": 33720 }, { "epoch": 0.5921803402447374, "grad_norm": 0.06739650667573739, "learning_rate": 0.0001951121143691238, "loss": 0.8562, "step": 33730 }, { "epoch": 0.5923559051247389, "grad_norm": 0.07094022648851389, "learning_rate": 0.0001951086555190688, "loss": 0.8586, "step": 33740 }, { "epoch": 0.5925314700047403, "grad_norm": 0.0679746640367729, "learning_rate": 0.00019510519547663908, "loss": 0.8556, "step": 33750 }, { "epoch": 0.5927070348847416, "grad_norm": 0.10123201053272536, "learning_rate": 0.0001951017342418785, "loss": 0.8537, "step": 33760 }, { "epoch": 0.5928825997647431, "grad_norm": 0.055839758649634655, "learning_rate": 0.0001950982718148309, "loss": 0.8571, "step": 33770 }, { "epoch": 0.5930581646447445, "grad_norm": 0.05432636230090095, "learning_rate": 0.00019509480819554013, "loss": 0.8586, "step": 33780 }, { "epoch": 0.5932337295247458, "grad_norm": 0.07366911562454, "learning_rate": 0.00019509134338405016, "loss": 0.8603, "step": 33790 }, { "epoch": 0.5934092944047473, "grad_norm": 0.06329128840980038, "learning_rate": 0.00019508787738040483, "loss": 0.8532, "step": 33800 }, { "epoch": 0.5935848592847487, "grad_norm": 0.06276336833351032, "learning_rate": 0.00019508441018464804, "loss": 0.8552, "step": 33810 }, { "epoch": 0.5937604241647501, "grad_norm": 0.08763719514226466, "learning_rate": 0.00019508094179682378, "loss": 0.8585, "step": 33820 }, { "epoch": 0.5939359890447515, "grad_norm": 0.08174460464418325, "learning_rate": 0.00019507747221697593, "loss": 0.8509, "step": 33830 }, { "epoch": 0.5941115539247529, "grad_norm": 0.06506027588637346, "learning_rate": 0.00019507400144514847, "loss": 0.8512, "step": 33840 }, { "epoch": 0.5942871188047543, "grad_norm": 0.05887661347839093, "learning_rate": 0.0001950705294813854, "loss": 0.8639, "step": 33850 }, { "epoch": 0.5944626836847557, "grad_norm": 0.05840276904719976, "learning_rate": 0.0001950670563257307, "loss": 0.8622, "step": 33860 }, { "epoch": 0.5946382485647571, "grad_norm": 0.08659359691907355, "learning_rate": 0.00019506358197822834, "loss": 0.8523, "step": 33870 }, { "epoch": 0.5948138134447585, "grad_norm": 0.053869451163014524, "learning_rate": 0.0001950601064389224, "loss": 0.8549, "step": 33880 }, { "epoch": 0.59498937832476, "grad_norm": 0.06038405341912577, "learning_rate": 0.00019505662970785686, "loss": 0.8532, "step": 33890 }, { "epoch": 0.5951649432047613, "grad_norm": 0.0721505272296486, "learning_rate": 0.00019505315178507576, "loss": 0.855, "step": 33900 }, { "epoch": 0.5953405080847627, "grad_norm": 0.053230407062687564, "learning_rate": 0.00019504967267062329, "loss": 0.8615, "step": 33910 }, { "epoch": 0.5955160729647642, "grad_norm": 0.058657199640471905, "learning_rate": 0.0001950461923645434, "loss": 0.8556, "step": 33920 }, { "epoch": 0.5956916378447655, "grad_norm": 0.07727005047861021, "learning_rate": 0.0001950427108668802, "loss": 0.8525, "step": 33930 }, { "epoch": 0.5958672027247669, "grad_norm": 0.07068961915817634, "learning_rate": 0.00019503922817767788, "loss": 0.8509, "step": 33940 }, { "epoch": 0.5960427676047684, "grad_norm": 0.05916249988616056, "learning_rate": 0.00019503574429698047, "loss": 0.8534, "step": 33950 }, { "epoch": 0.5962183324847697, "grad_norm": 0.06955834830224931, "learning_rate": 0.0001950322592248322, "loss": 0.845, "step": 33960 }, { "epoch": 0.5963938973647711, "grad_norm": 0.05876858293524209, "learning_rate": 0.00019502877296127716, "loss": 0.8579, "step": 33970 }, { "epoch": 0.5965694622447726, "grad_norm": 0.059142508073981366, "learning_rate": 0.00019502528550635956, "loss": 0.85, "step": 33980 }, { "epoch": 0.596745027124774, "grad_norm": 0.05969014428855633, "learning_rate": 0.00019502179686012357, "loss": 0.8676, "step": 33990 }, { "epoch": 0.5969205920047753, "grad_norm": 0.06916823034422771, "learning_rate": 0.0001950183070226134, "loss": 0.8557, "step": 34000 }, { "epoch": 0.5970961568847768, "grad_norm": 0.0681463783297539, "learning_rate": 0.00019501481599387325, "loss": 0.8543, "step": 34010 }, { "epoch": 0.5972717217647782, "grad_norm": 0.06169019852425541, "learning_rate": 0.00019501132377394736, "loss": 0.8568, "step": 34020 }, { "epoch": 0.5974472866447795, "grad_norm": 0.0698389529854056, "learning_rate": 0.00019500783036287996, "loss": 0.8583, "step": 34030 }, { "epoch": 0.597622851524781, "grad_norm": 0.07269948440082466, "learning_rate": 0.0001950043357607154, "loss": 0.8546, "step": 34040 }, { "epoch": 0.5977984164047824, "grad_norm": 0.07001628449510446, "learning_rate": 0.00019500083996749782, "loss": 0.8509, "step": 34050 }, { "epoch": 0.5979739812847837, "grad_norm": 0.061539304354296116, "learning_rate": 0.0001949973429832716, "loss": 0.8533, "step": 34060 }, { "epoch": 0.5981495461647852, "grad_norm": 0.07275834089086519, "learning_rate": 0.00019499384480808104, "loss": 0.8548, "step": 34070 }, { "epoch": 0.5983251110447866, "grad_norm": 0.06378460335538848, "learning_rate": 0.00019499034544197047, "loss": 0.8463, "step": 34080 }, { "epoch": 0.598500675924788, "grad_norm": 0.06908130484288214, "learning_rate": 0.00019498684488498418, "loss": 0.852, "step": 34090 }, { "epoch": 0.5986762408047894, "grad_norm": 0.06901830723010856, "learning_rate": 0.00019498334313716658, "loss": 0.8589, "step": 34100 }, { "epoch": 0.5988518056847908, "grad_norm": 0.08159285468925763, "learning_rate": 0.000194979840198562, "loss": 0.8457, "step": 34110 }, { "epoch": 0.5990273705647923, "grad_norm": 0.06328393113993441, "learning_rate": 0.00019497633606921483, "loss": 0.8521, "step": 34120 }, { "epoch": 0.5992029354447936, "grad_norm": 0.0617059580951153, "learning_rate": 0.00019497283074916945, "loss": 0.8536, "step": 34130 }, { "epoch": 0.599378500324795, "grad_norm": 0.06899821000403349, "learning_rate": 0.00019496932423847035, "loss": 0.8571, "step": 34140 }, { "epoch": 0.5995540652047965, "grad_norm": 0.06574458310330941, "learning_rate": 0.00019496581653716185, "loss": 0.8575, "step": 34150 }, { "epoch": 0.5997296300847978, "grad_norm": 0.05704013541866481, "learning_rate": 0.00019496230764528844, "loss": 0.8548, "step": 34160 }, { "epoch": 0.5999051949647992, "grad_norm": 0.06801725699137037, "learning_rate": 0.00019495879756289465, "loss": 0.8568, "step": 34170 }, { "epoch": 0.6000807598448007, "grad_norm": 0.07012249983770487, "learning_rate": 0.00019495528629002483, "loss": 0.8572, "step": 34180 }, { "epoch": 0.6002563247248021, "grad_norm": 0.06961452937468617, "learning_rate": 0.00019495177382672355, "loss": 0.8478, "step": 34190 }, { "epoch": 0.6004318896048034, "grad_norm": 0.05723780463908684, "learning_rate": 0.0001949482601730353, "loss": 0.8548, "step": 34200 }, { "epoch": 0.6006074544848049, "grad_norm": 0.0664632457918821, "learning_rate": 0.00019494474532900452, "loss": 0.8587, "step": 34210 }, { "epoch": 0.6007830193648063, "grad_norm": 0.08746435703327587, "learning_rate": 0.0001949412292946759, "loss": 0.8571, "step": 34220 }, { "epoch": 0.6009585842448076, "grad_norm": 0.07415728197339716, "learning_rate": 0.00019493771207009384, "loss": 0.8505, "step": 34230 }, { "epoch": 0.6011341491248091, "grad_norm": 0.0645957367885992, "learning_rate": 0.000194934193655303, "loss": 0.8542, "step": 34240 }, { "epoch": 0.6013097140048105, "grad_norm": 0.0753753670283995, "learning_rate": 0.0001949306740503479, "loss": 0.8427, "step": 34250 }, { "epoch": 0.6014852788848118, "grad_norm": 0.06909887879824832, "learning_rate": 0.00019492715325527318, "loss": 0.8472, "step": 34260 }, { "epoch": 0.6016608437648133, "grad_norm": 0.06510616957342819, "learning_rate": 0.00019492363127012342, "loss": 0.8538, "step": 34270 }, { "epoch": 0.6018364086448147, "grad_norm": 0.07478147354247672, "learning_rate": 0.00019492010809494324, "loss": 0.8555, "step": 34280 }, { "epoch": 0.6020119735248161, "grad_norm": 0.1040143627505095, "learning_rate": 0.0001949165837297773, "loss": 0.8466, "step": 34290 }, { "epoch": 0.6021875384048175, "grad_norm": 0.08853087611619244, "learning_rate": 0.00019491305817467025, "loss": 0.8503, "step": 34300 }, { "epoch": 0.6023631032848189, "grad_norm": 0.07051582668870532, "learning_rate": 0.00019490953142966677, "loss": 0.8504, "step": 34310 }, { "epoch": 0.6025386681648203, "grad_norm": 0.06507810491914909, "learning_rate": 0.00019490600349481152, "loss": 0.8515, "step": 34320 }, { "epoch": 0.6027142330448217, "grad_norm": 0.05803637568335623, "learning_rate": 0.0001949024743701492, "loss": 0.8496, "step": 34330 }, { "epoch": 0.6028897979248231, "grad_norm": 0.08804326703234838, "learning_rate": 0.0001948989440557245, "loss": 0.8557, "step": 34340 }, { "epoch": 0.6030653628048245, "grad_norm": 0.07521450401318783, "learning_rate": 0.00019489541255158225, "loss": 0.8478, "step": 34350 }, { "epoch": 0.603240927684826, "grad_norm": 0.06362222379227477, "learning_rate": 0.00019489187985776706, "loss": 0.8495, "step": 34360 }, { "epoch": 0.6034164925648273, "grad_norm": 0.08576794885034267, "learning_rate": 0.00019488834597432378, "loss": 0.8545, "step": 34370 }, { "epoch": 0.6035920574448287, "grad_norm": 0.05403595388340057, "learning_rate": 0.00019488481090129721, "loss": 0.8478, "step": 34380 }, { "epoch": 0.6037676223248302, "grad_norm": 0.08444872432734009, "learning_rate": 0.00019488127463873202, "loss": 0.8528, "step": 34390 }, { "epoch": 0.6039431872048315, "grad_norm": 0.07713042900071777, "learning_rate": 0.00019487773718667313, "loss": 0.8421, "step": 34400 }, { "epoch": 0.6041187520848329, "grad_norm": 0.06253681330025504, "learning_rate": 0.00019487419854516533, "loss": 0.8569, "step": 34410 }, { "epoch": 0.6042943169648344, "grad_norm": 0.07541534542829878, "learning_rate": 0.00019487065871425343, "loss": 0.85, "step": 34420 }, { "epoch": 0.6044698818448357, "grad_norm": 0.06569872882552927, "learning_rate": 0.0001948671176939823, "loss": 0.8516, "step": 34430 }, { "epoch": 0.6046454467248371, "grad_norm": 0.05787874501281186, "learning_rate": 0.0001948635754843968, "loss": 0.8519, "step": 34440 }, { "epoch": 0.6048210116048386, "grad_norm": 0.07088968423592686, "learning_rate": 0.00019486003208554178, "loss": 0.8557, "step": 34450 }, { "epoch": 0.60499657648484, "grad_norm": 0.07937747612236312, "learning_rate": 0.0001948564874974622, "loss": 0.8539, "step": 34460 }, { "epoch": 0.6051721413648414, "grad_norm": 0.054789960025789536, "learning_rate": 0.0001948529417202029, "loss": 0.8558, "step": 34470 }, { "epoch": 0.6053477062448428, "grad_norm": 0.06191223901418255, "learning_rate": 0.00019484939475380888, "loss": 0.8563, "step": 34480 }, { "epoch": 0.6055232711248442, "grad_norm": 0.07394923377594598, "learning_rate": 0.00019484584659832502, "loss": 0.8464, "step": 34490 }, { "epoch": 0.6056988360048456, "grad_norm": 0.08377041515233934, "learning_rate": 0.00019484229725379631, "loss": 0.8532, "step": 34500 }, { "epoch": 0.605874400884847, "grad_norm": 0.08768644896356383, "learning_rate": 0.0001948387467202677, "loss": 0.8554, "step": 34510 }, { "epoch": 0.6060499657648484, "grad_norm": 0.09512010864008254, "learning_rate": 0.00019483519499778417, "loss": 0.8465, "step": 34520 }, { "epoch": 0.6062255306448499, "grad_norm": 0.05139461283749981, "learning_rate": 0.00019483164208639075, "loss": 0.8521, "step": 34530 }, { "epoch": 0.6064010955248512, "grad_norm": 0.07612331646637036, "learning_rate": 0.00019482808798613246, "loss": 0.8545, "step": 34540 }, { "epoch": 0.6065766604048526, "grad_norm": 0.08350886131162212, "learning_rate": 0.0001948245326970543, "loss": 0.8518, "step": 34550 }, { "epoch": 0.6067522252848541, "grad_norm": 0.08362189520514308, "learning_rate": 0.00019482097621920132, "loss": 0.855, "step": 34560 }, { "epoch": 0.6069277901648554, "grad_norm": 0.10478827675198307, "learning_rate": 0.0001948174185526186, "loss": 0.8552, "step": 34570 }, { "epoch": 0.6071033550448568, "grad_norm": 0.057904228374658286, "learning_rate": 0.0001948138596973512, "loss": 0.8462, "step": 34580 }, { "epoch": 0.6072789199248583, "grad_norm": 0.09665187461508506, "learning_rate": 0.00019481029965344422, "loss": 0.8619, "step": 34590 }, { "epoch": 0.6074544848048596, "grad_norm": 0.058454337765642655, "learning_rate": 0.00019480673842094278, "loss": 0.8562, "step": 34600 }, { "epoch": 0.607630049684861, "grad_norm": 0.07033780953395381, "learning_rate": 0.00019480317599989197, "loss": 0.8539, "step": 34610 }, { "epoch": 0.6078056145648625, "grad_norm": 0.07943137462688576, "learning_rate": 0.00019479961239033696, "loss": 0.856, "step": 34620 }, { "epoch": 0.6079811794448639, "grad_norm": 0.0707311613267164, "learning_rate": 0.00019479604759232283, "loss": 0.8555, "step": 34630 }, { "epoch": 0.6081567443248652, "grad_norm": 0.07987787880419214, "learning_rate": 0.00019479248160589487, "loss": 0.8531, "step": 34640 }, { "epoch": 0.6083323092048667, "grad_norm": 0.05356653201670992, "learning_rate": 0.00019478891443109816, "loss": 0.8574, "step": 34650 }, { "epoch": 0.6085078740848681, "grad_norm": 0.04843820949108811, "learning_rate": 0.00019478534606797796, "loss": 0.8537, "step": 34660 }, { "epoch": 0.6086834389648694, "grad_norm": 0.09059237229235718, "learning_rate": 0.0001947817765165794, "loss": 0.8569, "step": 34670 }, { "epoch": 0.6088590038448709, "grad_norm": 0.12780915295476045, "learning_rate": 0.00019477820577694778, "loss": 0.8624, "step": 34680 }, { "epoch": 0.6090345687248723, "grad_norm": 0.09966172911865735, "learning_rate": 0.00019477463384912834, "loss": 0.8524, "step": 34690 }, { "epoch": 0.6092101336048736, "grad_norm": 0.06560661431774106, "learning_rate": 0.0001947710607331663, "loss": 0.8469, "step": 34700 }, { "epoch": 0.6093856984848751, "grad_norm": 0.06268195356031811, "learning_rate": 0.00019476748642910694, "loss": 0.8544, "step": 34710 }, { "epoch": 0.6095612633648765, "grad_norm": 0.08819573541613727, "learning_rate": 0.00019476391093699557, "loss": 0.8509, "step": 34720 }, { "epoch": 0.6097368282448778, "grad_norm": 0.057939836604518694, "learning_rate": 0.00019476033425687748, "loss": 0.8565, "step": 34730 }, { "epoch": 0.6099123931248793, "grad_norm": 0.05143523506946456, "learning_rate": 0.000194756756388798, "loss": 0.8581, "step": 34740 }, { "epoch": 0.6100879580048807, "grad_norm": 0.06646143553453308, "learning_rate": 0.0001947531773328024, "loss": 0.8512, "step": 34750 }, { "epoch": 0.6102635228848821, "grad_norm": 0.08173241030253155, "learning_rate": 0.00019474959708893613, "loss": 0.8491, "step": 34760 }, { "epoch": 0.6104390877648835, "grad_norm": 0.055024221196336494, "learning_rate": 0.00019474601565724447, "loss": 0.857, "step": 34770 }, { "epoch": 0.6106146526448849, "grad_norm": 0.05243261163213456, "learning_rate": 0.00019474243303777282, "loss": 0.8516, "step": 34780 }, { "epoch": 0.6107902175248863, "grad_norm": 0.0636974240648765, "learning_rate": 0.00019473884923056658, "loss": 0.8612, "step": 34790 }, { "epoch": 0.6109657824048877, "grad_norm": 0.07280353527188747, "learning_rate": 0.00019473526423567116, "loss": 0.8549, "step": 34800 }, { "epoch": 0.6111413472848891, "grad_norm": 0.050835402437000354, "learning_rate": 0.000194731678053132, "loss": 0.8646, "step": 34810 }, { "epoch": 0.6113169121648905, "grad_norm": 0.06979680964693555, "learning_rate": 0.0001947280906829945, "loss": 0.85, "step": 34820 }, { "epoch": 0.611492477044892, "grad_norm": 0.052344970397456426, "learning_rate": 0.00019472450212530408, "loss": 0.8504, "step": 34830 }, { "epoch": 0.6116680419248933, "grad_norm": 0.05888793116942056, "learning_rate": 0.0001947209123801063, "loss": 0.8591, "step": 34840 }, { "epoch": 0.6118436068048948, "grad_norm": 0.06725615289430606, "learning_rate": 0.0001947173214474466, "loss": 0.8504, "step": 34850 }, { "epoch": 0.6120191716848962, "grad_norm": 0.10396665103145325, "learning_rate": 0.0001947137293273705, "loss": 0.8568, "step": 34860 }, { "epoch": 0.6121947365648975, "grad_norm": 0.05593176188823821, "learning_rate": 0.00019471013601992344, "loss": 0.8539, "step": 34870 }, { "epoch": 0.612370301444899, "grad_norm": 0.06518475816837115, "learning_rate": 0.00019470654152515103, "loss": 0.8526, "step": 34880 }, { "epoch": 0.6125458663249004, "grad_norm": 0.06985734451624626, "learning_rate": 0.0001947029458430988, "loss": 0.856, "step": 34890 }, { "epoch": 0.6127214312049017, "grad_norm": 0.0839693543407261, "learning_rate": 0.00019469934897381225, "loss": 0.8485, "step": 34900 }, { "epoch": 0.6128969960849032, "grad_norm": 0.05545623296106622, "learning_rate": 0.00019469575091733698, "loss": 0.8534, "step": 34910 }, { "epoch": 0.6130725609649046, "grad_norm": 0.0671854795455712, "learning_rate": 0.00019469215167371861, "loss": 0.8554, "step": 34920 }, { "epoch": 0.613248125844906, "grad_norm": 0.07516795658812246, "learning_rate": 0.00019468855124300273, "loss": 0.8539, "step": 34930 }, { "epoch": 0.6134236907249074, "grad_norm": 0.0657584907316818, "learning_rate": 0.00019468494962523496, "loss": 0.8533, "step": 34940 }, { "epoch": 0.6135992556049088, "grad_norm": 0.05710331700715575, "learning_rate": 0.00019468134682046087, "loss": 0.8542, "step": 34950 }, { "epoch": 0.6137748204849102, "grad_norm": 0.06165258854173056, "learning_rate": 0.00019467774282872622, "loss": 0.8501, "step": 34960 }, { "epoch": 0.6139503853649116, "grad_norm": 0.05925466068659386, "learning_rate": 0.0001946741376500766, "loss": 0.86, "step": 34970 }, { "epoch": 0.614125950244913, "grad_norm": 0.08297599360398715, "learning_rate": 0.0001946705312845577, "loss": 0.8483, "step": 34980 }, { "epoch": 0.6143015151249144, "grad_norm": 0.06848582810772601, "learning_rate": 0.0001946669237322152, "loss": 0.8524, "step": 34990 }, { "epoch": 0.6144770800049159, "grad_norm": 0.06415836359778315, "learning_rate": 0.00019466331499309484, "loss": 0.8539, "step": 35000 }, { "epoch": 0.6146526448849172, "grad_norm": 0.0699341589465008, "learning_rate": 0.00019465970506724235, "loss": 0.8462, "step": 35010 }, { "epoch": 0.6148282097649186, "grad_norm": 0.056854316111495426, "learning_rate": 0.0001946560939547034, "loss": 0.8502, "step": 35020 }, { "epoch": 0.6150037746449201, "grad_norm": 0.062496942147475074, "learning_rate": 0.0001946524816555238, "loss": 0.8525, "step": 35030 }, { "epoch": 0.6151793395249214, "grad_norm": 0.09785139797026242, "learning_rate": 0.00019464886816974934, "loss": 0.852, "step": 35040 }, { "epoch": 0.6153549044049228, "grad_norm": 0.05990644503817809, "learning_rate": 0.00019464525349742576, "loss": 0.8506, "step": 35050 }, { "epoch": 0.6155304692849243, "grad_norm": 0.05639479508290439, "learning_rate": 0.00019464163763859886, "loss": 0.8515, "step": 35060 }, { "epoch": 0.6157060341649256, "grad_norm": 0.06107081242997416, "learning_rate": 0.00019463802059331445, "loss": 0.8489, "step": 35070 }, { "epoch": 0.615881599044927, "grad_norm": 0.061323045401882895, "learning_rate": 0.00019463440236161842, "loss": 0.8605, "step": 35080 }, { "epoch": 0.6160571639249285, "grad_norm": 0.07425184240079015, "learning_rate": 0.00019463078294355653, "loss": 0.8502, "step": 35090 }, { "epoch": 0.6162327288049299, "grad_norm": 0.05686981818084413, "learning_rate": 0.00019462716233917471, "loss": 0.8518, "step": 35100 }, { "epoch": 0.6164082936849312, "grad_norm": 0.05353311030969614, "learning_rate": 0.00019462354054851876, "loss": 0.8634, "step": 35110 }, { "epoch": 0.6165838585649327, "grad_norm": 0.04669879776062381, "learning_rate": 0.00019461991757163463, "loss": 0.8483, "step": 35120 }, { "epoch": 0.6167594234449341, "grad_norm": 0.08159711820773151, "learning_rate": 0.0001946162934085682, "loss": 0.8529, "step": 35130 }, { "epoch": 0.6169349883249354, "grad_norm": 0.07477392707231849, "learning_rate": 0.0001946126680593654, "loss": 0.8571, "step": 35140 }, { "epoch": 0.6171105532049369, "grad_norm": 0.07083966007230696, "learning_rate": 0.00019460904152407215, "loss": 0.8509, "step": 35150 }, { "epoch": 0.6172861180849383, "grad_norm": 0.05950305218945452, "learning_rate": 0.00019460541380273439, "loss": 0.85, "step": 35160 }, { "epoch": 0.6174616829649396, "grad_norm": 0.05840870989158585, "learning_rate": 0.0001946017848953981, "loss": 0.8574, "step": 35170 }, { "epoch": 0.6176372478449411, "grad_norm": 0.0630857625531817, "learning_rate": 0.00019459815480210926, "loss": 0.8545, "step": 35180 }, { "epoch": 0.6178128127249425, "grad_norm": 0.11852715547995746, "learning_rate": 0.00019459452352291388, "loss": 0.8651, "step": 35190 }, { "epoch": 0.617988377604944, "grad_norm": 0.10820359625858686, "learning_rate": 0.00019459089105785792, "loss": 0.85, "step": 35200 }, { "epoch": 0.6181639424849453, "grad_norm": 0.07438104524092465, "learning_rate": 0.00019458725740698743, "loss": 0.8476, "step": 35210 }, { "epoch": 0.6183395073649467, "grad_norm": 0.06411775496824966, "learning_rate": 0.00019458362257034848, "loss": 0.8565, "step": 35220 }, { "epoch": 0.6185150722449482, "grad_norm": 0.06020952816501542, "learning_rate": 0.00019457998654798708, "loss": 0.8528, "step": 35230 }, { "epoch": 0.6186906371249495, "grad_norm": 0.06809620171079009, "learning_rate": 0.00019457634933994932, "loss": 0.8472, "step": 35240 }, { "epoch": 0.6188662020049509, "grad_norm": 0.07869469384170233, "learning_rate": 0.0001945727109462813, "loss": 0.8543, "step": 35250 }, { "epoch": 0.6190417668849524, "grad_norm": 0.0969606223284533, "learning_rate": 0.00019456907136702905, "loss": 0.856, "step": 35260 }, { "epoch": 0.6192173317649537, "grad_norm": 0.07657802789244514, "learning_rate": 0.00019456543060223877, "loss": 0.8468, "step": 35270 }, { "epoch": 0.6193928966449551, "grad_norm": 0.11840800771275703, "learning_rate": 0.00019456178865195656, "loss": 0.8569, "step": 35280 }, { "epoch": 0.6195684615249566, "grad_norm": 0.11782081658706912, "learning_rate": 0.00019455814551622855, "loss": 0.8507, "step": 35290 }, { "epoch": 0.619744026404958, "grad_norm": 0.051412046120139276, "learning_rate": 0.00019455450119510088, "loss": 0.8501, "step": 35300 }, { "epoch": 0.6199195912849593, "grad_norm": 0.060447155727302464, "learning_rate": 0.0001945508556886198, "loss": 0.8568, "step": 35310 }, { "epoch": 0.6200951561649608, "grad_norm": 0.056978118148601875, "learning_rate": 0.00019454720899683142, "loss": 0.8552, "step": 35320 }, { "epoch": 0.6202707210449622, "grad_norm": 0.07041942313975055, "learning_rate": 0.00019454356111978197, "loss": 0.8539, "step": 35330 }, { "epoch": 0.6204462859249635, "grad_norm": 0.06063322173155, "learning_rate": 0.00019453991205751767, "loss": 0.8517, "step": 35340 }, { "epoch": 0.620621850804965, "grad_norm": 0.054265375415042304, "learning_rate": 0.00019453626181008474, "loss": 0.8552, "step": 35350 }, { "epoch": 0.6207974156849664, "grad_norm": 0.06929662846308, "learning_rate": 0.0001945326103775295, "loss": 0.8401, "step": 35360 }, { "epoch": 0.6209729805649677, "grad_norm": 0.07075652924699205, "learning_rate": 0.00019452895775989816, "loss": 0.8524, "step": 35370 }, { "epoch": 0.6211485454449692, "grad_norm": 0.10783682228526828, "learning_rate": 0.00019452530395723693, "loss": 0.8513, "step": 35380 }, { "epoch": 0.6213241103249706, "grad_norm": 0.0828773076149495, "learning_rate": 0.00019452164896959222, "loss": 0.8552, "step": 35390 }, { "epoch": 0.621499675204972, "grad_norm": 0.06800421382588624, "learning_rate": 0.0001945179927970103, "loss": 0.8597, "step": 35400 }, { "epoch": 0.6216752400849734, "grad_norm": 0.09257435864103389, "learning_rate": 0.00019451433543953746, "loss": 0.8496, "step": 35410 }, { "epoch": 0.6218508049649748, "grad_norm": 0.0952315488951537, "learning_rate": 0.00019451067689722009, "loss": 0.85, "step": 35420 }, { "epoch": 0.6220263698449762, "grad_norm": 0.04250544748115005, "learning_rate": 0.00019450701717010454, "loss": 0.8576, "step": 35430 }, { "epoch": 0.6222019347249776, "grad_norm": 0.09417091775707515, "learning_rate": 0.00019450335625823714, "loss": 0.8535, "step": 35440 }, { "epoch": 0.622377499604979, "grad_norm": 0.06978252480478395, "learning_rate": 0.0001944996941616643, "loss": 0.8593, "step": 35450 }, { "epoch": 0.6225530644849804, "grad_norm": 0.0595719592262861, "learning_rate": 0.0001944960308804324, "loss": 0.8614, "step": 35460 }, { "epoch": 0.6227286293649819, "grad_norm": 0.06421082553200595, "learning_rate": 0.00019449236641458787, "loss": 0.8526, "step": 35470 }, { "epoch": 0.6229041942449832, "grad_norm": 0.04825250045597371, "learning_rate": 0.00019448870076417717, "loss": 0.8566, "step": 35480 }, { "epoch": 0.6230797591249846, "grad_norm": 0.057775415430948954, "learning_rate": 0.00019448503392924672, "loss": 0.8543, "step": 35490 }, { "epoch": 0.6232553240049861, "grad_norm": 0.06200843207263272, "learning_rate": 0.00019448136590984294, "loss": 0.8559, "step": 35500 }, { "epoch": 0.6234308888849874, "grad_norm": 0.06729193899831018, "learning_rate": 0.00019447769670601233, "loss": 0.8561, "step": 35510 }, { "epoch": 0.6236064537649888, "grad_norm": 0.06436754804426412, "learning_rate": 0.00019447402631780144, "loss": 0.8599, "step": 35520 }, { "epoch": 0.6237820186449903, "grad_norm": 0.06539739452354515, "learning_rate": 0.0001944703547452567, "loss": 0.8645, "step": 35530 }, { "epoch": 0.6239575835249916, "grad_norm": 0.053083807336956945, "learning_rate": 0.00019446668198842463, "loss": 0.8525, "step": 35540 }, { "epoch": 0.624133148404993, "grad_norm": 0.0749700342389716, "learning_rate": 0.00019446300804735176, "loss": 0.8522, "step": 35550 }, { "epoch": 0.6243087132849945, "grad_norm": 0.06392837979672508, "learning_rate": 0.00019445933292208472, "loss": 0.8644, "step": 35560 }, { "epoch": 0.6244842781649959, "grad_norm": 0.07479804828629034, "learning_rate": 0.00019445565661267003, "loss": 0.8543, "step": 35570 }, { "epoch": 0.6246598430449973, "grad_norm": 0.07714742521734454, "learning_rate": 0.00019445197911915422, "loss": 0.8567, "step": 35580 }, { "epoch": 0.6248354079249987, "grad_norm": 0.07212979777045361, "learning_rate": 0.00019444830044158394, "loss": 0.8586, "step": 35590 }, { "epoch": 0.6250109728050001, "grad_norm": 0.08013864706255329, "learning_rate": 0.00019444462058000578, "loss": 0.854, "step": 35600 }, { "epoch": 0.6251865376850015, "grad_norm": 0.08123172652829323, "learning_rate": 0.00019444093953446638, "loss": 0.862, "step": 35610 }, { "epoch": 0.6253621025650029, "grad_norm": 0.05798147529217454, "learning_rate": 0.00019443725730501233, "loss": 0.8573, "step": 35620 }, { "epoch": 0.6255376674450043, "grad_norm": 0.055017675113326975, "learning_rate": 0.00019443357389169037, "loss": 0.8507, "step": 35630 }, { "epoch": 0.6257132323250058, "grad_norm": 0.07599734709884388, "learning_rate": 0.0001944298892945471, "loss": 0.8515, "step": 35640 }, { "epoch": 0.6258887972050071, "grad_norm": 0.07687000643091679, "learning_rate": 0.00019442620351362925, "loss": 0.853, "step": 35650 }, { "epoch": 0.6260643620850085, "grad_norm": 0.07479447487599987, "learning_rate": 0.0001944225165489835, "loss": 0.8569, "step": 35660 }, { "epoch": 0.62623992696501, "grad_norm": 0.06641948546158757, "learning_rate": 0.00019441882840065652, "loss": 0.8523, "step": 35670 }, { "epoch": 0.6264154918450113, "grad_norm": 0.06156123953782701, "learning_rate": 0.0001944151390686951, "loss": 0.8596, "step": 35680 }, { "epoch": 0.6265910567250127, "grad_norm": 0.06228433821008845, "learning_rate": 0.00019441144855314598, "loss": 0.8556, "step": 35690 }, { "epoch": 0.6267666216050142, "grad_norm": 0.09107482336510285, "learning_rate": 0.00019440775685405587, "loss": 0.8605, "step": 35700 }, { "epoch": 0.6269421864850155, "grad_norm": 0.05458582870981273, "learning_rate": 0.0001944040639714716, "loss": 0.8515, "step": 35710 }, { "epoch": 0.6271177513650169, "grad_norm": 0.09173208687315132, "learning_rate": 0.00019440036990543995, "loss": 0.8489, "step": 35720 }, { "epoch": 0.6272933162450184, "grad_norm": 0.0681794116682946, "learning_rate": 0.0001943966746560077, "loss": 0.8537, "step": 35730 }, { "epoch": 0.6274688811250198, "grad_norm": 0.08587233644344451, "learning_rate": 0.00019439297822322168, "loss": 0.8461, "step": 35740 }, { "epoch": 0.6276444460050211, "grad_norm": 0.06356517416369904, "learning_rate": 0.00019438928060712873, "loss": 0.8459, "step": 35750 }, { "epoch": 0.6278200108850226, "grad_norm": 0.05177373409835948, "learning_rate": 0.00019438558180777572, "loss": 0.8524, "step": 35760 }, { "epoch": 0.627995575765024, "grad_norm": 0.06735479320037475, "learning_rate": 0.00019438188182520943, "loss": 0.8545, "step": 35770 }, { "epoch": 0.6281711406450253, "grad_norm": 0.04545458783677734, "learning_rate": 0.00019437818065947687, "loss": 0.8513, "step": 35780 }, { "epoch": 0.6283467055250268, "grad_norm": 0.0743030862743558, "learning_rate": 0.0001943744783106248, "loss": 0.8485, "step": 35790 }, { "epoch": 0.6285222704050282, "grad_norm": 0.06242227394235423, "learning_rate": 0.00019437077477870026, "loss": 0.8541, "step": 35800 }, { "epoch": 0.6286978352850295, "grad_norm": 0.057731829242700525, "learning_rate": 0.00019436707006375005, "loss": 0.851, "step": 35810 }, { "epoch": 0.628873400165031, "grad_norm": 0.062025293211090925, "learning_rate": 0.00019436336416582125, "loss": 0.8572, "step": 35820 }, { "epoch": 0.6290489650450324, "grad_norm": 0.0658025467520558, "learning_rate": 0.00019435965708496065, "loss": 0.8431, "step": 35830 }, { "epoch": 0.6292245299250337, "grad_norm": 0.057001104229437535, "learning_rate": 0.00019435594882121534, "loss": 0.8545, "step": 35840 }, { "epoch": 0.6294000948050352, "grad_norm": 0.05765310490469667, "learning_rate": 0.00019435223937463224, "loss": 0.8583, "step": 35850 }, { "epoch": 0.6295756596850366, "grad_norm": 0.07063938627794408, "learning_rate": 0.0001943485287452584, "loss": 0.8549, "step": 35860 }, { "epoch": 0.629751224565038, "grad_norm": 0.050729905105507095, "learning_rate": 0.0001943448169331408, "loss": 0.8576, "step": 35870 }, { "epoch": 0.6299267894450394, "grad_norm": 0.09129028499355137, "learning_rate": 0.0001943411039383265, "loss": 0.8517, "step": 35880 }, { "epoch": 0.6301023543250408, "grad_norm": 0.07732869895160206, "learning_rate": 0.00019433738976086252, "loss": 0.8514, "step": 35890 }, { "epoch": 0.6302779192050422, "grad_norm": 0.07663801325704016, "learning_rate": 0.00019433367440079593, "loss": 0.8494, "step": 35900 }, { "epoch": 0.6304534840850436, "grad_norm": 0.05256721152514931, "learning_rate": 0.00019432995785817378, "loss": 0.8594, "step": 35910 }, { "epoch": 0.630629048965045, "grad_norm": 0.07482238246995944, "learning_rate": 0.00019432624013304318, "loss": 0.859, "step": 35920 }, { "epoch": 0.6308046138450465, "grad_norm": 0.05195625898512678, "learning_rate": 0.0001943225212254512, "loss": 0.8439, "step": 35930 }, { "epoch": 0.6309801787250479, "grad_norm": 0.05836665698651033, "learning_rate": 0.00019431880113544506, "loss": 0.8556, "step": 35940 }, { "epoch": 0.6311557436050492, "grad_norm": 0.06637415270835795, "learning_rate": 0.00019431507986307178, "loss": 0.8505, "step": 35950 }, { "epoch": 0.6313313084850507, "grad_norm": 0.0653947570492513, "learning_rate": 0.00019431135740837854, "loss": 0.8558, "step": 35960 }, { "epoch": 0.6315068733650521, "grad_norm": 0.07278740518276308, "learning_rate": 0.00019430763377141256, "loss": 0.8619, "step": 35970 }, { "epoch": 0.6316824382450534, "grad_norm": 0.05890957119272087, "learning_rate": 0.00019430390895222094, "loss": 0.8538, "step": 35980 }, { "epoch": 0.6318580031250549, "grad_norm": 0.0746151396973331, "learning_rate": 0.00019430018295085095, "loss": 0.853, "step": 35990 }, { "epoch": 0.6320335680050563, "grad_norm": 0.056191638741705914, "learning_rate": 0.00019429645576734975, "loss": 0.8496, "step": 36000 }, { "epoch": 0.6322091328850576, "grad_norm": 0.05035942127316008, "learning_rate": 0.00019429272740176457, "loss": 0.8561, "step": 36010 }, { "epoch": 0.6323846977650591, "grad_norm": 0.07052213208075607, "learning_rate": 0.00019428899785414264, "loss": 0.8534, "step": 36020 }, { "epoch": 0.6325602626450605, "grad_norm": 0.05475529996117037, "learning_rate": 0.00019428526712453122, "loss": 0.8574, "step": 36030 }, { "epoch": 0.6327358275250619, "grad_norm": 0.08155595905734213, "learning_rate": 0.00019428153521297762, "loss": 0.8546, "step": 36040 }, { "epoch": 0.6329113924050633, "grad_norm": 0.07526565631650077, "learning_rate": 0.00019427780211952908, "loss": 0.8509, "step": 36050 }, { "epoch": 0.6330869572850647, "grad_norm": 0.04797682428152055, "learning_rate": 0.0001942740678442329, "loss": 0.853, "step": 36060 }, { "epoch": 0.6332625221650661, "grad_norm": 0.06358432686153061, "learning_rate": 0.00019427033238713645, "loss": 0.8561, "step": 36070 }, { "epoch": 0.6334380870450675, "grad_norm": 0.06505799840811059, "learning_rate": 0.00019426659574828695, "loss": 0.8546, "step": 36080 }, { "epoch": 0.6336136519250689, "grad_norm": 0.08030254511205151, "learning_rate": 0.00019426285792773185, "loss": 0.8567, "step": 36090 }, { "epoch": 0.6337892168050703, "grad_norm": 0.08157588032788955, "learning_rate": 0.00019425911892551845, "loss": 0.8501, "step": 36100 }, { "epoch": 0.6339647816850718, "grad_norm": 0.06870263335132609, "learning_rate": 0.00019425537874169412, "loss": 0.8508, "step": 36110 }, { "epoch": 0.6341403465650731, "grad_norm": 0.07160045281201786, "learning_rate": 0.00019425163737630632, "loss": 0.8475, "step": 36120 }, { "epoch": 0.6343159114450745, "grad_norm": 0.05822114303336733, "learning_rate": 0.00019424789482940235, "loss": 0.8555, "step": 36130 }, { "epoch": 0.634491476325076, "grad_norm": 0.05068342806106513, "learning_rate": 0.00019424415110102968, "loss": 0.8593, "step": 36140 }, { "epoch": 0.6346670412050773, "grad_norm": 0.061873293117991245, "learning_rate": 0.0001942404061912358, "loss": 0.8596, "step": 36150 }, { "epoch": 0.6348426060850787, "grad_norm": 0.051877590190828715, "learning_rate": 0.00019423666010006806, "loss": 0.8565, "step": 36160 }, { "epoch": 0.6350181709650802, "grad_norm": 0.06718572663230457, "learning_rate": 0.000194232912827574, "loss": 0.8592, "step": 36170 }, { "epoch": 0.6351937358450815, "grad_norm": 0.07219749948735053, "learning_rate": 0.00019422916437380106, "loss": 0.8443, "step": 36180 }, { "epoch": 0.6353693007250829, "grad_norm": 0.08080954873563949, "learning_rate": 0.0001942254147387967, "loss": 0.8612, "step": 36190 }, { "epoch": 0.6355448656050844, "grad_norm": 0.07246671536703048, "learning_rate": 0.00019422166392260852, "loss": 0.8605, "step": 36200 }, { "epoch": 0.6357204304850858, "grad_norm": 0.07713676763317144, "learning_rate": 0.00019421791192528396, "loss": 0.8456, "step": 36210 }, { "epoch": 0.6358959953650871, "grad_norm": 0.060114851381019865, "learning_rate": 0.0001942141587468706, "loss": 0.8473, "step": 36220 }, { "epoch": 0.6360715602450886, "grad_norm": 0.05181978553966608, "learning_rate": 0.00019421040438741597, "loss": 0.8511, "step": 36230 }, { "epoch": 0.63624712512509, "grad_norm": 0.06839396033294472, "learning_rate": 0.00019420664884696765, "loss": 0.8531, "step": 36240 }, { "epoch": 0.6364226900050913, "grad_norm": 0.06459867748021651, "learning_rate": 0.00019420289212557327, "loss": 0.8541, "step": 36250 }, { "epoch": 0.6365982548850928, "grad_norm": 0.0856833376884787, "learning_rate": 0.00019419913422328035, "loss": 0.8515, "step": 36260 }, { "epoch": 0.6367738197650942, "grad_norm": 0.06780356702193666, "learning_rate": 0.00019419537514013657, "loss": 0.8564, "step": 36270 }, { "epoch": 0.6369493846450955, "grad_norm": 0.07958979410284236, "learning_rate": 0.0001941916148761895, "loss": 0.8433, "step": 36280 }, { "epoch": 0.637124949525097, "grad_norm": 0.07850443744298666, "learning_rate": 0.0001941878534314868, "loss": 0.8458, "step": 36290 }, { "epoch": 0.6373005144050984, "grad_norm": 0.07707223822641375, "learning_rate": 0.00019418409080607617, "loss": 0.8516, "step": 36300 }, { "epoch": 0.6374760792850999, "grad_norm": 0.06214210032982156, "learning_rate": 0.0001941803270000052, "loss": 0.8515, "step": 36310 }, { "epoch": 0.6376516441651012, "grad_norm": 0.06355774981208874, "learning_rate": 0.00019417656201332168, "loss": 0.8497, "step": 36320 }, { "epoch": 0.6378272090451026, "grad_norm": 0.06067445704718926, "learning_rate": 0.00019417279584607323, "loss": 0.8514, "step": 36330 }, { "epoch": 0.6380027739251041, "grad_norm": 0.08770130500633533, "learning_rate": 0.0001941690284983076, "loss": 0.847, "step": 36340 }, { "epoch": 0.6381783388051054, "grad_norm": 0.06558576971820798, "learning_rate": 0.00019416525997007254, "loss": 0.8567, "step": 36350 }, { "epoch": 0.6383539036851068, "grad_norm": 0.05820107222837087, "learning_rate": 0.00019416149026141581, "loss": 0.8539, "step": 36360 }, { "epoch": 0.6385294685651083, "grad_norm": 0.05267804125794523, "learning_rate": 0.00019415771937238507, "loss": 0.8586, "step": 36370 }, { "epoch": 0.6387050334451096, "grad_norm": 0.08695477606851233, "learning_rate": 0.00019415394730302823, "loss": 0.851, "step": 36380 }, { "epoch": 0.638880598325111, "grad_norm": 0.06538505723661595, "learning_rate": 0.00019415017405339298, "loss": 0.8606, "step": 36390 }, { "epoch": 0.6390561632051125, "grad_norm": 0.06376383163971192, "learning_rate": 0.00019414639962352724, "loss": 0.8486, "step": 36400 }, { "epoch": 0.6392317280851139, "grad_norm": 0.04739020285717028, "learning_rate": 0.00019414262401347868, "loss": 0.8548, "step": 36410 }, { "epoch": 0.6394072929651152, "grad_norm": 0.06173938329739082, "learning_rate": 0.0001941388472232953, "loss": 0.8472, "step": 36420 }, { "epoch": 0.6395828578451167, "grad_norm": 0.04840986598326304, "learning_rate": 0.00019413506925302485, "loss": 0.8626, "step": 36430 }, { "epoch": 0.6397584227251181, "grad_norm": 0.06173354933683468, "learning_rate": 0.00019413129010271524, "loss": 0.8569, "step": 36440 }, { "epoch": 0.6399339876051194, "grad_norm": 0.043975034048418765, "learning_rate": 0.0001941275097724143, "loss": 0.8632, "step": 36450 }, { "epoch": 0.6401095524851209, "grad_norm": 0.07374775523059414, "learning_rate": 0.00019412372826216995, "loss": 0.8513, "step": 36460 }, { "epoch": 0.6402851173651223, "grad_norm": 0.07095778101191982, "learning_rate": 0.00019411994557203013, "loss": 0.8496, "step": 36470 }, { "epoch": 0.6404606822451236, "grad_norm": 0.0588199543716021, "learning_rate": 0.0001941161617020428, "loss": 0.8547, "step": 36480 }, { "epoch": 0.6406362471251251, "grad_norm": 0.05790530082306417, "learning_rate": 0.0001941123766522558, "loss": 0.8558, "step": 36490 }, { "epoch": 0.6408118120051265, "grad_norm": 0.0820573216689123, "learning_rate": 0.00019410859042271717, "loss": 0.851, "step": 36500 }, { "epoch": 0.6409873768851279, "grad_norm": 0.06959660918500564, "learning_rate": 0.00019410480301347484, "loss": 0.8515, "step": 36510 }, { "epoch": 0.6411629417651293, "grad_norm": 0.05388821726819028, "learning_rate": 0.0001941010144245768, "loss": 0.8506, "step": 36520 }, { "epoch": 0.6413385066451307, "grad_norm": 0.06397575300903521, "learning_rate": 0.00019409722465607113, "loss": 0.8553, "step": 36530 }, { "epoch": 0.6415140715251321, "grad_norm": 0.08310493491477118, "learning_rate": 0.00019409343370800573, "loss": 0.8484, "step": 36540 }, { "epoch": 0.6416896364051335, "grad_norm": 0.0793142164178133, "learning_rate": 0.00019408964158042867, "loss": 0.8501, "step": 36550 }, { "epoch": 0.6418652012851349, "grad_norm": 0.09197633468028114, "learning_rate": 0.00019408584827338804, "loss": 0.8474, "step": 36560 }, { "epoch": 0.6420407661651363, "grad_norm": 0.08012505785680628, "learning_rate": 0.00019408205378693185, "loss": 0.8558, "step": 36570 }, { "epoch": 0.6422163310451378, "grad_norm": 0.07553688354126001, "learning_rate": 0.0001940782581211082, "loss": 0.8617, "step": 36580 }, { "epoch": 0.6423918959251391, "grad_norm": 0.07068734646253913, "learning_rate": 0.00019407446127596523, "loss": 0.8555, "step": 36590 }, { "epoch": 0.6425674608051405, "grad_norm": 0.05181002428238948, "learning_rate": 0.00019407066325155095, "loss": 0.8485, "step": 36600 }, { "epoch": 0.642743025685142, "grad_norm": 0.0573593318759357, "learning_rate": 0.00019406686404791352, "loss": 0.8522, "step": 36610 }, { "epoch": 0.6429185905651433, "grad_norm": 0.07303750883194077, "learning_rate": 0.00019406306366510115, "loss": 0.8503, "step": 36620 }, { "epoch": 0.6430941554451447, "grad_norm": 0.0659211719524383, "learning_rate": 0.00019405926210316188, "loss": 0.8606, "step": 36630 }, { "epoch": 0.6432697203251462, "grad_norm": 0.0656886444006019, "learning_rate": 0.00019405545936214392, "loss": 0.8549, "step": 36640 }, { "epoch": 0.6434452852051475, "grad_norm": 0.062332585438194754, "learning_rate": 0.00019405165544209547, "loss": 0.8505, "step": 36650 }, { "epoch": 0.643620850085149, "grad_norm": 0.06841176714510738, "learning_rate": 0.0001940478503430647, "loss": 0.8551, "step": 36660 }, { "epoch": 0.6437964149651504, "grad_norm": 0.07150888512434354, "learning_rate": 0.00019404404406509986, "loss": 0.8561, "step": 36670 }, { "epoch": 0.6439719798451518, "grad_norm": 0.06379551699126415, "learning_rate": 0.0001940402366082491, "loss": 0.8476, "step": 36680 }, { "epoch": 0.6441475447251532, "grad_norm": 0.061140521720130285, "learning_rate": 0.00019403642797256073, "loss": 0.8533, "step": 36690 }, { "epoch": 0.6443231096051546, "grad_norm": 0.10444503389107626, "learning_rate": 0.00019403261815808306, "loss": 0.8493, "step": 36700 }, { "epoch": 0.644498674485156, "grad_norm": 0.0700846855138409, "learning_rate": 0.00019402880716486422, "loss": 0.8488, "step": 36710 }, { "epoch": 0.6446742393651574, "grad_norm": 0.06713513941886354, "learning_rate": 0.00019402499499295256, "loss": 0.8556, "step": 36720 }, { "epoch": 0.6448498042451588, "grad_norm": 0.061225876506984425, "learning_rate": 0.0001940211816423964, "loss": 0.849, "step": 36730 }, { "epoch": 0.6450253691251602, "grad_norm": 0.06802664143611026, "learning_rate": 0.00019401736711324404, "loss": 0.8557, "step": 36740 }, { "epoch": 0.6452009340051617, "grad_norm": 0.0842578402681984, "learning_rate": 0.0001940135514055438, "loss": 0.8554, "step": 36750 }, { "epoch": 0.645376498885163, "grad_norm": 0.048621232009313696, "learning_rate": 0.0001940097345193441, "loss": 0.8434, "step": 36760 }, { "epoch": 0.6455520637651644, "grad_norm": 0.09336965721386961, "learning_rate": 0.00019400591645469314, "loss": 0.8533, "step": 36770 }, { "epoch": 0.6457276286451659, "grad_norm": 0.06565779274386349, "learning_rate": 0.00019400209721163943, "loss": 0.8576, "step": 36780 }, { "epoch": 0.6459031935251672, "grad_norm": 0.06844731073120032, "learning_rate": 0.00019399827679023138, "loss": 0.8548, "step": 36790 }, { "epoch": 0.6460787584051686, "grad_norm": 0.06856352957673315, "learning_rate": 0.00019399445519051726, "loss": 0.8496, "step": 36800 }, { "epoch": 0.6462543232851701, "grad_norm": 0.053700853849284004, "learning_rate": 0.00019399063241254562, "loss": 0.8497, "step": 36810 }, { "epoch": 0.6464298881651714, "grad_norm": 0.06127983370055654, "learning_rate": 0.00019398680845636484, "loss": 0.8597, "step": 36820 }, { "epoch": 0.6466054530451728, "grad_norm": 0.08646198908870435, "learning_rate": 0.00019398298332202335, "loss": 0.8495, "step": 36830 }, { "epoch": 0.6467810179251743, "grad_norm": 0.08239851108001221, "learning_rate": 0.00019397915700956965, "loss": 0.8407, "step": 36840 }, { "epoch": 0.6469565828051757, "grad_norm": 0.06207546976664638, "learning_rate": 0.00019397532951905225, "loss": 0.845, "step": 36850 }, { "epoch": 0.647132147685177, "grad_norm": 0.08551758212191632, "learning_rate": 0.0001939715008505196, "loss": 0.8533, "step": 36860 }, { "epoch": 0.6473077125651785, "grad_norm": 0.0836928276485169, "learning_rate": 0.00019396767100402016, "loss": 0.8552, "step": 36870 }, { "epoch": 0.6474832774451799, "grad_norm": 0.07469543587171348, "learning_rate": 0.00019396383997960257, "loss": 0.8445, "step": 36880 }, { "epoch": 0.6476588423251812, "grad_norm": 0.05758658249101825, "learning_rate": 0.00019396000777731524, "loss": 0.8626, "step": 36890 }, { "epoch": 0.6478344072051827, "grad_norm": 0.05965928336856097, "learning_rate": 0.0001939561743972069, "loss": 0.8618, "step": 36900 }, { "epoch": 0.6480099720851841, "grad_norm": 0.12204435628010683, "learning_rate": 0.0001939523398393259, "loss": 0.8552, "step": 36910 }, { "epoch": 0.6481855369651854, "grad_norm": 0.06056068795873357, "learning_rate": 0.000193948504103721, "loss": 0.8563, "step": 36920 }, { "epoch": 0.6483611018451869, "grad_norm": 0.09568873045411537, "learning_rate": 0.00019394466719044077, "loss": 0.8559, "step": 36930 }, { "epoch": 0.6485366667251883, "grad_norm": 0.06076745235090655, "learning_rate": 0.00019394082909953374, "loss": 0.857, "step": 36940 }, { "epoch": 0.6487122316051896, "grad_norm": 0.06621688889768534, "learning_rate": 0.00019393698983104864, "loss": 0.8458, "step": 36950 }, { "epoch": 0.6488877964851911, "grad_norm": 0.04461924919043371, "learning_rate": 0.00019393314938503402, "loss": 0.8553, "step": 36960 }, { "epoch": 0.6490633613651925, "grad_norm": 0.061924014414468175, "learning_rate": 0.0001939293077615386, "loss": 0.8539, "step": 36970 }, { "epoch": 0.6492389262451939, "grad_norm": 0.06476495351455278, "learning_rate": 0.00019392546496061107, "loss": 0.8506, "step": 36980 }, { "epoch": 0.6494144911251953, "grad_norm": 0.06953577071739893, "learning_rate": 0.00019392162098230006, "loss": 0.8581, "step": 36990 }, { "epoch": 0.6495900560051967, "grad_norm": 0.0785118898999296, "learning_rate": 0.00019391777582665432, "loss": 0.8577, "step": 37000 }, { "epoch": 0.6497656208851981, "grad_norm": 0.05206107295997744, "learning_rate": 0.00019391392949372253, "loss": 0.8615, "step": 37010 }, { "epoch": 0.6499411857651995, "grad_norm": 0.06496737040581292, "learning_rate": 0.00019391008198355348, "loss": 0.861, "step": 37020 }, { "epoch": 0.6501167506452009, "grad_norm": 0.05692369045321024, "learning_rate": 0.00019390623329619585, "loss": 0.8527, "step": 37030 }, { "epoch": 0.6502923155252024, "grad_norm": 0.05305745381489499, "learning_rate": 0.00019390238343169848, "loss": 0.8547, "step": 37040 }, { "epoch": 0.6504678804052038, "grad_norm": 0.04733323803046007, "learning_rate": 0.00019389853239011007, "loss": 0.8553, "step": 37050 }, { "epoch": 0.6506434452852051, "grad_norm": 0.06540989739031151, "learning_rate": 0.00019389468017147945, "loss": 0.8561, "step": 37060 }, { "epoch": 0.6508190101652066, "grad_norm": 0.06167428874051344, "learning_rate": 0.00019389082677585546, "loss": 0.8565, "step": 37070 }, { "epoch": 0.650994575045208, "grad_norm": 0.06191360125354541, "learning_rate": 0.00019388697220328686, "loss": 0.8482, "step": 37080 }, { "epoch": 0.6511701399252093, "grad_norm": 0.052200485125201194, "learning_rate": 0.00019388311645382255, "loss": 0.8599, "step": 37090 }, { "epoch": 0.6513457048052108, "grad_norm": 0.05238262424281822, "learning_rate": 0.00019387925952751137, "loss": 0.8465, "step": 37100 }, { "epoch": 0.6515212696852122, "grad_norm": 0.0650624249456386, "learning_rate": 0.0001938754014244021, "loss": 0.8537, "step": 37110 }, { "epoch": 0.6516968345652135, "grad_norm": 0.060728436266565156, "learning_rate": 0.00019387154214454372, "loss": 0.852, "step": 37120 }, { "epoch": 0.651872399445215, "grad_norm": 0.07147667386646017, "learning_rate": 0.00019386768168798513, "loss": 0.8552, "step": 37130 }, { "epoch": 0.6520479643252164, "grad_norm": 0.07186009586091582, "learning_rate": 0.0001938638200547752, "loss": 0.8594, "step": 37140 }, { "epoch": 0.6522235292052178, "grad_norm": 0.07252579646041139, "learning_rate": 0.00019385995724496288, "loss": 0.8568, "step": 37150 }, { "epoch": 0.6523990940852192, "grad_norm": 0.04992076063858382, "learning_rate": 0.00019385609325859712, "loss": 0.8553, "step": 37160 }, { "epoch": 0.6525746589652206, "grad_norm": 0.1156216386538427, "learning_rate": 0.00019385222809572682, "loss": 0.8492, "step": 37170 }, { "epoch": 0.652750223845222, "grad_norm": 0.06597385893622341, "learning_rate": 0.00019384836175640103, "loss": 0.8597, "step": 37180 }, { "epoch": 0.6529257887252234, "grad_norm": 0.09201002113372127, "learning_rate": 0.0001938444942406687, "loss": 0.861, "step": 37190 }, { "epoch": 0.6531013536052248, "grad_norm": 0.05997204217787153, "learning_rate": 0.0001938406255485788, "loss": 0.8565, "step": 37200 }, { "epoch": 0.6532769184852262, "grad_norm": 0.06725761373097415, "learning_rate": 0.0001938367556801804, "loss": 0.8536, "step": 37210 }, { "epoch": 0.6534524833652277, "grad_norm": 0.048547706175447805, "learning_rate": 0.00019383288463552253, "loss": 0.8462, "step": 37220 }, { "epoch": 0.653628048245229, "grad_norm": 0.0854249613098066, "learning_rate": 0.0001938290124146542, "loss": 0.8504, "step": 37230 }, { "epoch": 0.6538036131252304, "grad_norm": 0.10695412726949444, "learning_rate": 0.00019382513901762451, "loss": 0.8563, "step": 37240 }, { "epoch": 0.6539791780052319, "grad_norm": 0.059549706245784706, "learning_rate": 0.00019382126444448253, "loss": 0.8528, "step": 37250 }, { "epoch": 0.6541547428852332, "grad_norm": 0.06480454683593952, "learning_rate": 0.0001938173886952773, "loss": 0.8552, "step": 37260 }, { "epoch": 0.6543303077652346, "grad_norm": 0.052992262233834436, "learning_rate": 0.00019381351177005798, "loss": 0.8503, "step": 37270 }, { "epoch": 0.6545058726452361, "grad_norm": 0.0840218871094458, "learning_rate": 0.00019380963366887372, "loss": 0.8532, "step": 37280 }, { "epoch": 0.6546814375252374, "grad_norm": 0.08335825080738776, "learning_rate": 0.00019380575439177357, "loss": 0.8578, "step": 37290 }, { "epoch": 0.6548570024052388, "grad_norm": 0.057238469421032806, "learning_rate": 0.00019380187393880672, "loss": 0.8564, "step": 37300 }, { "epoch": 0.6550325672852403, "grad_norm": 0.07772996567163555, "learning_rate": 0.00019379799231002235, "loss": 0.8573, "step": 37310 }, { "epoch": 0.6552081321652417, "grad_norm": 0.055074023597818295, "learning_rate": 0.00019379410950546967, "loss": 0.8515, "step": 37320 }, { "epoch": 0.655383697045243, "grad_norm": 0.074800004767543, "learning_rate": 0.0001937902255251978, "loss": 0.8528, "step": 37330 }, { "epoch": 0.6555592619252445, "grad_norm": 0.06819611774728637, "learning_rate": 0.000193786340369256, "loss": 0.8536, "step": 37340 }, { "epoch": 0.6557348268052459, "grad_norm": 0.0751267868098657, "learning_rate": 0.0001937824540376935, "loss": 0.8549, "step": 37350 }, { "epoch": 0.6559103916852472, "grad_norm": 0.05917351128923754, "learning_rate": 0.0001937785665305595, "loss": 0.8531, "step": 37360 }, { "epoch": 0.6560859565652487, "grad_norm": 0.052737832642472025, "learning_rate": 0.00019377467784790331, "loss": 0.8513, "step": 37370 }, { "epoch": 0.6562615214452501, "grad_norm": 0.061557127069232956, "learning_rate": 0.0001937707879897741, "loss": 0.8507, "step": 37380 }, { "epoch": 0.6564370863252516, "grad_norm": 0.06521211198992052, "learning_rate": 0.0001937668969562213, "loss": 0.8538, "step": 37390 }, { "epoch": 0.6566126512052529, "grad_norm": 0.07378682797359613, "learning_rate": 0.00019376300474729412, "loss": 0.8545, "step": 37400 }, { "epoch": 0.6567882160852543, "grad_norm": 0.07702464063341834, "learning_rate": 0.0001937591113630419, "loss": 0.8544, "step": 37410 }, { "epoch": 0.6569637809652558, "grad_norm": 0.08383112344379849, "learning_rate": 0.00019375521680351393, "loss": 0.8544, "step": 37420 }, { "epoch": 0.6571393458452571, "grad_norm": 0.07530485328552966, "learning_rate": 0.00019375132106875961, "loss": 0.8552, "step": 37430 }, { "epoch": 0.6573149107252585, "grad_norm": 0.0544413732347663, "learning_rate": 0.00019374742415882829, "loss": 0.8432, "step": 37440 }, { "epoch": 0.65749047560526, "grad_norm": 0.06160155077162333, "learning_rate": 0.0001937435260737693, "loss": 0.8484, "step": 37450 }, { "epoch": 0.6576660404852613, "grad_norm": 0.0952057739173672, "learning_rate": 0.0001937396268136321, "loss": 0.8582, "step": 37460 }, { "epoch": 0.6578416053652627, "grad_norm": 0.05244839661665075, "learning_rate": 0.00019373572637846603, "loss": 0.8535, "step": 37470 }, { "epoch": 0.6580171702452642, "grad_norm": 0.06187155209555895, "learning_rate": 0.0001937318247683205, "loss": 0.8552, "step": 37480 }, { "epoch": 0.6581927351252655, "grad_norm": 0.0584040938235431, "learning_rate": 0.00019372792198324502, "loss": 0.8559, "step": 37490 }, { "epoch": 0.6583683000052669, "grad_norm": 0.055474237687724116, "learning_rate": 0.00019372401802328901, "loss": 0.8438, "step": 37500 }, { "epoch": 0.6585438648852684, "grad_norm": 0.0524119338954262, "learning_rate": 0.00019372011288850189, "loss": 0.8484, "step": 37510 }, { "epoch": 0.6587194297652698, "grad_norm": 0.05103332671480185, "learning_rate": 0.00019371620657893316, "loss": 0.847, "step": 37520 }, { "epoch": 0.6588949946452711, "grad_norm": 0.09493167130321829, "learning_rate": 0.00019371229909463235, "loss": 0.8533, "step": 37530 }, { "epoch": 0.6590705595252726, "grad_norm": 0.06317861540815776, "learning_rate": 0.00019370839043564894, "loss": 0.8554, "step": 37540 }, { "epoch": 0.659246124405274, "grad_norm": 0.07241413700919462, "learning_rate": 0.00019370448060203246, "loss": 0.8489, "step": 37550 }, { "epoch": 0.6594216892852753, "grad_norm": 0.07442804515717467, "learning_rate": 0.0001937005695938324, "loss": 0.8592, "step": 37560 }, { "epoch": 0.6595972541652768, "grad_norm": 0.05907274198172103, "learning_rate": 0.00019369665741109842, "loss": 0.8528, "step": 37570 }, { "epoch": 0.6597728190452782, "grad_norm": 0.07008583989482692, "learning_rate": 0.00019369274405387998, "loss": 0.85, "step": 37580 }, { "epoch": 0.6599483839252795, "grad_norm": 0.06629954575937737, "learning_rate": 0.00019368882952222673, "loss": 0.8531, "step": 37590 }, { "epoch": 0.660123948805281, "grad_norm": 0.0657371451834434, "learning_rate": 0.00019368491381618828, "loss": 0.8632, "step": 37600 }, { "epoch": 0.6602995136852824, "grad_norm": 0.05998068569773772, "learning_rate": 0.00019368099693581415, "loss": 0.8439, "step": 37610 }, { "epoch": 0.6604750785652838, "grad_norm": 0.05304750448711977, "learning_rate": 0.00019367707888115406, "loss": 0.8536, "step": 37620 }, { "epoch": 0.6606506434452852, "grad_norm": 0.12268445650693684, "learning_rate": 0.0001936731596522576, "loss": 0.8542, "step": 37630 }, { "epoch": 0.6608262083252866, "grad_norm": 0.07169785483504959, "learning_rate": 0.00019366923924917445, "loss": 0.86, "step": 37640 }, { "epoch": 0.661001773205288, "grad_norm": 0.06573221563619855, "learning_rate": 0.0001936653176719543, "loss": 0.8487, "step": 37650 }, { "epoch": 0.6611773380852894, "grad_norm": 0.07629495591588048, "learning_rate": 0.00019366139492064678, "loss": 0.8511, "step": 37660 }, { "epoch": 0.6613529029652908, "grad_norm": 0.05404632399896313, "learning_rate": 0.00019365747099530164, "loss": 0.858, "step": 37670 }, { "epoch": 0.6615284678452922, "grad_norm": 0.05610280941789637, "learning_rate": 0.0001936535458959686, "loss": 0.859, "step": 37680 }, { "epoch": 0.6617040327252937, "grad_norm": 0.06168636363671716, "learning_rate": 0.0001936496196226974, "loss": 0.8589, "step": 37690 }, { "epoch": 0.661879597605295, "grad_norm": 0.05932972821608176, "learning_rate": 0.0001936456921755377, "loss": 0.8554, "step": 37700 }, { "epoch": 0.6620551624852964, "grad_norm": 0.04937703482295075, "learning_rate": 0.00019364176355453935, "loss": 0.8568, "step": 37710 }, { "epoch": 0.6622307273652979, "grad_norm": 0.08329938439856557, "learning_rate": 0.0001936378337597521, "loss": 0.8563, "step": 37720 }, { "epoch": 0.6624062922452992, "grad_norm": 0.05902381127889509, "learning_rate": 0.00019363390279122575, "loss": 0.8502, "step": 37730 }, { "epoch": 0.6625818571253006, "grad_norm": 0.05623210413911416, "learning_rate": 0.0001936299706490101, "loss": 0.8555, "step": 37740 }, { "epoch": 0.6627574220053021, "grad_norm": 0.05133706041623081, "learning_rate": 0.00019362603733315495, "loss": 0.8509, "step": 37750 }, { "epoch": 0.6629329868853034, "grad_norm": 0.11011880523108593, "learning_rate": 0.00019362210284371018, "loss": 0.8571, "step": 37760 }, { "epoch": 0.6631085517653049, "grad_norm": 0.056125700670241994, "learning_rate": 0.00019361816718072562, "loss": 0.8491, "step": 37770 }, { "epoch": 0.6632841166453063, "grad_norm": 0.06945032734786988, "learning_rate": 0.00019361423034425113, "loss": 0.8571, "step": 37780 }, { "epoch": 0.6634596815253077, "grad_norm": 0.0630911025901408, "learning_rate": 0.00019361029233433657, "loss": 0.8621, "step": 37790 }, { "epoch": 0.6636352464053091, "grad_norm": 0.08774168477957467, "learning_rate": 0.00019360635315103186, "loss": 0.8589, "step": 37800 }, { "epoch": 0.6638108112853105, "grad_norm": 0.07719629346190231, "learning_rate": 0.0001936024127943869, "loss": 0.8576, "step": 37810 }, { "epoch": 0.6639863761653119, "grad_norm": 0.0637031205071741, "learning_rate": 0.00019359847126445166, "loss": 0.8507, "step": 37820 }, { "epoch": 0.6641619410453133, "grad_norm": 0.07173186053671246, "learning_rate": 0.000193594528561276, "loss": 0.8553, "step": 37830 }, { "epoch": 0.6643375059253147, "grad_norm": 0.05762728689472479, "learning_rate": 0.00019359058468490997, "loss": 0.8554, "step": 37840 }, { "epoch": 0.6645130708053161, "grad_norm": 0.06257446837105776, "learning_rate": 0.00019358663963540344, "loss": 0.8581, "step": 37850 }, { "epoch": 0.6646886356853176, "grad_norm": 0.07737467716384697, "learning_rate": 0.00019358269341280648, "loss": 0.8556, "step": 37860 }, { "epoch": 0.6648642005653189, "grad_norm": 0.06481174655334365, "learning_rate": 0.00019357874601716902, "loss": 0.8572, "step": 37870 }, { "epoch": 0.6650397654453203, "grad_norm": 0.0849916077242286, "learning_rate": 0.00019357479744854113, "loss": 0.8572, "step": 37880 }, { "epoch": 0.6652153303253218, "grad_norm": 0.08019375369849202, "learning_rate": 0.0001935708477069728, "loss": 0.8484, "step": 37890 }, { "epoch": 0.6653908952053231, "grad_norm": 0.04663846251202346, "learning_rate": 0.00019356689679251408, "loss": 0.8467, "step": 37900 }, { "epoch": 0.6655664600853245, "grad_norm": 0.0815017942556124, "learning_rate": 0.00019356294470521506, "loss": 0.8512, "step": 37910 }, { "epoch": 0.665742024965326, "grad_norm": 0.05352204645032317, "learning_rate": 0.00019355899144512574, "loss": 0.8574, "step": 37920 }, { "epoch": 0.6659175898453273, "grad_norm": 0.05049978544171157, "learning_rate": 0.0001935550370122963, "loss": 0.8518, "step": 37930 }, { "epoch": 0.6660931547253287, "grad_norm": 0.07150430081331152, "learning_rate": 0.0001935510814067768, "loss": 0.8491, "step": 37940 }, { "epoch": 0.6662687196053302, "grad_norm": 0.06538086035413604, "learning_rate": 0.00019354712462861737, "loss": 0.844, "step": 37950 }, { "epoch": 0.6664442844853316, "grad_norm": 0.06699699572194485, "learning_rate": 0.0001935431666778681, "loss": 0.8547, "step": 37960 }, { "epoch": 0.6666198493653329, "grad_norm": 0.08190437415819164, "learning_rate": 0.00019353920755457921, "loss": 0.8564, "step": 37970 }, { "epoch": 0.6667954142453344, "grad_norm": 0.06734593343604438, "learning_rate": 0.00019353524725880083, "loss": 0.8523, "step": 37980 }, { "epoch": 0.6669709791253358, "grad_norm": 0.08204448515188016, "learning_rate": 0.0001935312857905831, "loss": 0.8575, "step": 37990 }, { "epoch": 0.6671465440053371, "grad_norm": 0.062112684606804865, "learning_rate": 0.00019352732314997627, "loss": 0.8532, "step": 38000 }, { "epoch": 0.6673221088853386, "grad_norm": 0.05615120423314835, "learning_rate": 0.0001935233593370305, "loss": 0.8469, "step": 38010 }, { "epoch": 0.66749767376534, "grad_norm": 0.08780748667504068, "learning_rate": 0.00019351939435179605, "loss": 0.8554, "step": 38020 }, { "epoch": 0.6676732386453413, "grad_norm": 0.07200007753811391, "learning_rate": 0.00019351542819432313, "loss": 0.8574, "step": 38030 }, { "epoch": 0.6678488035253428, "grad_norm": 0.06828041013773944, "learning_rate": 0.00019351146086466202, "loss": 0.8527, "step": 38040 }, { "epoch": 0.6680243684053442, "grad_norm": 0.06736967113016043, "learning_rate": 0.00019350749236286298, "loss": 0.8484, "step": 38050 }, { "epoch": 0.6681999332853455, "grad_norm": 0.06476182808661356, "learning_rate": 0.00019350352268897626, "loss": 0.8565, "step": 38060 }, { "epoch": 0.668375498165347, "grad_norm": 0.058755311279339584, "learning_rate": 0.00019349955184305218, "loss": 0.8556, "step": 38070 }, { "epoch": 0.6685510630453484, "grad_norm": 0.0931523051048159, "learning_rate": 0.00019349557982514107, "loss": 0.8471, "step": 38080 }, { "epoch": 0.6687266279253498, "grad_norm": 0.05676744715878483, "learning_rate": 0.00019349160663529325, "loss": 0.857, "step": 38090 }, { "epoch": 0.6689021928053512, "grad_norm": 0.05059909715748807, "learning_rate": 0.000193487632273559, "loss": 0.8565, "step": 38100 }, { "epoch": 0.6690777576853526, "grad_norm": 0.07809517341834823, "learning_rate": 0.00019348365673998874, "loss": 0.8529, "step": 38110 }, { "epoch": 0.6692533225653541, "grad_norm": 0.0584716014164441, "learning_rate": 0.00019347968003463283, "loss": 0.8583, "step": 38120 }, { "epoch": 0.6694288874453554, "grad_norm": 0.06662936901938107, "learning_rate": 0.00019347570215754166, "loss": 0.8603, "step": 38130 }, { "epoch": 0.6696044523253568, "grad_norm": 0.06586392234386014, "learning_rate": 0.0001934717231087656, "loss": 0.8608, "step": 38140 }, { "epoch": 0.6697800172053583, "grad_norm": 0.10623083761347614, "learning_rate": 0.0001934677428883551, "loss": 0.8532, "step": 38150 }, { "epoch": 0.6699555820853597, "grad_norm": 0.09638475995923605, "learning_rate": 0.0001934637614963606, "loss": 0.8516, "step": 38160 }, { "epoch": 0.670131146965361, "grad_norm": 0.058795422311076415, "learning_rate": 0.00019345977893283248, "loss": 0.8607, "step": 38170 }, { "epoch": 0.6703067118453625, "grad_norm": 0.05649260063757745, "learning_rate": 0.00019345579519782126, "loss": 0.8532, "step": 38180 }, { "epoch": 0.6704822767253639, "grad_norm": 0.07299982565979918, "learning_rate": 0.00019345181029137737, "loss": 0.8495, "step": 38190 }, { "epoch": 0.6706578416053652, "grad_norm": 0.05793285622496156, "learning_rate": 0.00019344782421355135, "loss": 0.8587, "step": 38200 }, { "epoch": 0.6708334064853667, "grad_norm": 0.05833669304085181, "learning_rate": 0.0001934438369643937, "loss": 0.8482, "step": 38210 }, { "epoch": 0.6710089713653681, "grad_norm": 0.0651532540557894, "learning_rate": 0.00019343984854395488, "loss": 0.8454, "step": 38220 }, { "epoch": 0.6711845362453694, "grad_norm": 0.07119110329659442, "learning_rate": 0.00019343585895228549, "loss": 0.8585, "step": 38230 }, { "epoch": 0.6713601011253709, "grad_norm": 0.04163655715027227, "learning_rate": 0.00019343186818943607, "loss": 0.8548, "step": 38240 }, { "epoch": 0.6715356660053723, "grad_norm": 0.061613283317418326, "learning_rate": 0.00019342787625545714, "loss": 0.8614, "step": 38250 }, { "epoch": 0.6717112308853737, "grad_norm": 0.05157890872554061, "learning_rate": 0.00019342388315039927, "loss": 0.8523, "step": 38260 }, { "epoch": 0.6718867957653751, "grad_norm": 0.061386481554547775, "learning_rate": 0.00019341988887431312, "loss": 0.8527, "step": 38270 }, { "epoch": 0.6720623606453765, "grad_norm": 0.0798198824291417, "learning_rate": 0.00019341589342724926, "loss": 0.851, "step": 38280 }, { "epoch": 0.6722379255253779, "grad_norm": 0.05031272671096633, "learning_rate": 0.00019341189680925836, "loss": 0.8616, "step": 38290 }, { "epoch": 0.6724134904053793, "grad_norm": 0.05479447089001072, "learning_rate": 0.00019340789902039097, "loss": 0.8547, "step": 38300 }, { "epoch": 0.6725890552853807, "grad_norm": 0.06697802113164197, "learning_rate": 0.0001934039000606978, "loss": 0.8555, "step": 38310 }, { "epoch": 0.6727646201653821, "grad_norm": 0.060141097433892836, "learning_rate": 0.00019339989993022948, "loss": 0.8584, "step": 38320 }, { "epoch": 0.6729401850453836, "grad_norm": 0.055329847827215, "learning_rate": 0.00019339589862903676, "loss": 0.8484, "step": 38330 }, { "epoch": 0.6731157499253849, "grad_norm": 0.060419160437882743, "learning_rate": 0.00019339189615717031, "loss": 0.8539, "step": 38340 }, { "epoch": 0.6732913148053863, "grad_norm": 0.054023037183733885, "learning_rate": 0.00019338789251468076, "loss": 0.8531, "step": 38350 }, { "epoch": 0.6734668796853878, "grad_norm": 0.08802178944611289, "learning_rate": 0.00019338388770161896, "loss": 0.861, "step": 38360 }, { "epoch": 0.6736424445653891, "grad_norm": 0.09752123999721078, "learning_rate": 0.00019337988171803559, "loss": 0.8555, "step": 38370 }, { "epoch": 0.6738180094453905, "grad_norm": 0.05774619508475413, "learning_rate": 0.0001933758745639814, "loss": 0.85, "step": 38380 }, { "epoch": 0.673993574325392, "grad_norm": 0.048937483933054374, "learning_rate": 0.0001933718662395072, "loss": 0.8502, "step": 38390 }, { "epoch": 0.6741691392053933, "grad_norm": 0.08287386001195325, "learning_rate": 0.00019336785674466375, "loss": 0.8516, "step": 38400 }, { "epoch": 0.6743447040853947, "grad_norm": 0.05978697925761371, "learning_rate": 0.00019336384607950181, "loss": 0.8505, "step": 38410 }, { "epoch": 0.6745202689653962, "grad_norm": 0.06886447323597256, "learning_rate": 0.00019335983424407228, "loss": 0.8558, "step": 38420 }, { "epoch": 0.6746958338453976, "grad_norm": 0.0950789971308266, "learning_rate": 0.00019335582123842596, "loss": 0.8633, "step": 38430 }, { "epoch": 0.6748713987253989, "grad_norm": 0.08032533504576125, "learning_rate": 0.00019335180706261365, "loss": 0.8586, "step": 38440 }, { "epoch": 0.6750469636054004, "grad_norm": 0.057238656686856876, "learning_rate": 0.00019334779171668623, "loss": 0.8472, "step": 38450 }, { "epoch": 0.6752225284854018, "grad_norm": 0.05968640403986131, "learning_rate": 0.00019334377520069463, "loss": 0.856, "step": 38460 }, { "epoch": 0.6753980933654031, "grad_norm": 0.0642107965970955, "learning_rate": 0.0001933397575146897, "loss": 0.8586, "step": 38470 }, { "epoch": 0.6755736582454046, "grad_norm": 0.06006490875712044, "learning_rate": 0.00019333573865872234, "loss": 0.8544, "step": 38480 }, { "epoch": 0.675749223125406, "grad_norm": 0.05519994110777395, "learning_rate": 0.00019333171863284345, "loss": 0.8537, "step": 38490 }, { "epoch": 0.6759247880054075, "grad_norm": 0.06055860848526909, "learning_rate": 0.00019332769743710403, "loss": 0.8539, "step": 38500 }, { "epoch": 0.6761003528854088, "grad_norm": 0.053168845089898914, "learning_rate": 0.000193323675071555, "loss": 0.8525, "step": 38510 }, { "epoch": 0.6762759177654102, "grad_norm": 0.06885724077961501, "learning_rate": 0.00019331965153624725, "loss": 0.85, "step": 38520 }, { "epoch": 0.6764514826454117, "grad_norm": 0.06850288855808581, "learning_rate": 0.00019331562683123186, "loss": 0.856, "step": 38530 }, { "epoch": 0.676627047525413, "grad_norm": 0.07292865868603024, "learning_rate": 0.0001933116009565598, "loss": 0.8535, "step": 38540 }, { "epoch": 0.6768026124054144, "grad_norm": 0.06846372645073082, "learning_rate": 0.00019330757391228205, "loss": 0.8505, "step": 38550 }, { "epoch": 0.6769781772854159, "grad_norm": 0.058091477946306376, "learning_rate": 0.0001933035456984496, "loss": 0.8495, "step": 38560 }, { "epoch": 0.6771537421654172, "grad_norm": 0.05989839426993255, "learning_rate": 0.0001932995163151136, "loss": 0.8499, "step": 38570 }, { "epoch": 0.6773293070454186, "grad_norm": 0.09240247662497077, "learning_rate": 0.000193295485762325, "loss": 0.8565, "step": 38580 }, { "epoch": 0.6775048719254201, "grad_norm": 0.057063204172556496, "learning_rate": 0.00019329145404013495, "loss": 0.8547, "step": 38590 }, { "epoch": 0.6776804368054214, "grad_norm": 0.15279920991728776, "learning_rate": 0.00019328742114859443, "loss": 0.8586, "step": 38600 }, { "epoch": 0.6778560016854228, "grad_norm": 0.08301381738396195, "learning_rate": 0.00019328338708775463, "loss": 0.8558, "step": 38610 }, { "epoch": 0.6780315665654243, "grad_norm": 0.07332676076301292, "learning_rate": 0.00019327935185766665, "loss": 0.8513, "step": 38620 }, { "epoch": 0.6782071314454257, "grad_norm": 0.04996566281264174, "learning_rate": 0.00019327531545838158, "loss": 0.8602, "step": 38630 }, { "epoch": 0.678382696325427, "grad_norm": 0.07444060929298968, "learning_rate": 0.0001932712778899506, "loss": 0.8489, "step": 38640 }, { "epoch": 0.6785582612054285, "grad_norm": 0.05838653074748976, "learning_rate": 0.00019326723915242478, "loss": 0.8592, "step": 38650 }, { "epoch": 0.6787338260854299, "grad_norm": 0.08236897116036301, "learning_rate": 0.0001932631992458554, "loss": 0.8452, "step": 38660 }, { "epoch": 0.6789093909654312, "grad_norm": 0.06238003302606617, "learning_rate": 0.00019325915817029364, "loss": 0.8512, "step": 38670 }, { "epoch": 0.6790849558454327, "grad_norm": 0.07658942518419591, "learning_rate": 0.0001932551159257906, "loss": 0.8503, "step": 38680 }, { "epoch": 0.6792605207254341, "grad_norm": 0.051652297465904655, "learning_rate": 0.0001932510725123976, "loss": 0.863, "step": 38690 }, { "epoch": 0.6794360856054354, "grad_norm": 0.058276152637775726, "learning_rate": 0.0001932470279301658, "loss": 0.8533, "step": 38700 }, { "epoch": 0.6796116504854369, "grad_norm": 0.05188917669731097, "learning_rate": 0.0001932429821791465, "loss": 0.8601, "step": 38710 }, { "epoch": 0.6797872153654383, "grad_norm": 0.0852786127616298, "learning_rate": 0.00019323893525939094, "loss": 0.8509, "step": 38720 }, { "epoch": 0.6799627802454397, "grad_norm": 0.05371347240418529, "learning_rate": 0.0001932348871709504, "loss": 0.8516, "step": 38730 }, { "epoch": 0.6801383451254411, "grad_norm": 0.06175767364634412, "learning_rate": 0.00019323083791387613, "loss": 0.846, "step": 38740 }, { "epoch": 0.6803139100054425, "grad_norm": 0.06482905751299804, "learning_rate": 0.0001932267874882195, "loss": 0.8532, "step": 38750 }, { "epoch": 0.6804894748854439, "grad_norm": 0.06701586287069444, "learning_rate": 0.0001932227358940318, "loss": 0.8584, "step": 38760 }, { "epoch": 0.6806650397654453, "grad_norm": 0.08841277703472471, "learning_rate": 0.00019321868313136432, "loss": 0.8506, "step": 38770 }, { "epoch": 0.6808406046454467, "grad_norm": 0.052896336535333564, "learning_rate": 0.00019321462920026848, "loss": 0.8558, "step": 38780 }, { "epoch": 0.6810161695254481, "grad_norm": 0.06982670010308453, "learning_rate": 0.0001932105741007956, "loss": 0.8562, "step": 38790 }, { "epoch": 0.6811917344054496, "grad_norm": 0.12828401747542187, "learning_rate": 0.0001932065178329971, "loss": 0.8518, "step": 38800 }, { "epoch": 0.6813672992854509, "grad_norm": 0.0736766727163479, "learning_rate": 0.00019320246039692434, "loss": 0.8486, "step": 38810 }, { "epoch": 0.6815428641654523, "grad_norm": 0.06060630103327168, "learning_rate": 0.0001931984017926287, "loss": 0.8561, "step": 38820 }, { "epoch": 0.6817184290454538, "grad_norm": 0.0596949079670509, "learning_rate": 0.00019319434202016172, "loss": 0.854, "step": 38830 }, { "epoch": 0.6818939939254551, "grad_norm": 0.0884984598032678, "learning_rate": 0.0001931902810795747, "loss": 0.8438, "step": 38840 }, { "epoch": 0.6820695588054566, "grad_norm": 0.07427328943601644, "learning_rate": 0.00019318621897091915, "loss": 0.8578, "step": 38850 }, { "epoch": 0.682245123685458, "grad_norm": 0.0695403498132366, "learning_rate": 0.00019318215569424655, "loss": 0.8587, "step": 38860 }, { "epoch": 0.6824206885654593, "grad_norm": 0.052921105612545385, "learning_rate": 0.00019317809124960836, "loss": 0.8624, "step": 38870 }, { "epoch": 0.6825962534454608, "grad_norm": 0.10602625417418367, "learning_rate": 0.0001931740256370561, "loss": 0.8525, "step": 38880 }, { "epoch": 0.6827718183254622, "grad_norm": 0.08029971683443676, "learning_rate": 0.00019316995885664126, "loss": 0.8611, "step": 38890 }, { "epoch": 0.6829473832054636, "grad_norm": 0.07532149671644263, "learning_rate": 0.00019316589090841537, "loss": 0.8571, "step": 38900 }, { "epoch": 0.683122948085465, "grad_norm": 0.0488963992628108, "learning_rate": 0.00019316182179243, "loss": 0.8439, "step": 38910 }, { "epoch": 0.6832985129654664, "grad_norm": 0.04996543073060992, "learning_rate": 0.00019315775150873668, "loss": 0.8519, "step": 38920 }, { "epoch": 0.6834740778454678, "grad_norm": 0.08368173784236026, "learning_rate": 0.00019315368005738698, "loss": 0.8602, "step": 38930 }, { "epoch": 0.6836496427254692, "grad_norm": 0.05969460573442689, "learning_rate": 0.0001931496074384325, "loss": 0.8509, "step": 38940 }, { "epoch": 0.6838252076054706, "grad_norm": 0.06950926342423024, "learning_rate": 0.00019314553365192482, "loss": 0.8543, "step": 38950 }, { "epoch": 0.684000772485472, "grad_norm": 0.05466953021066055, "learning_rate": 0.0001931414586979156, "loss": 0.8604, "step": 38960 }, { "epoch": 0.6841763373654735, "grad_norm": 0.0592583004826709, "learning_rate": 0.0001931373825764564, "loss": 0.8613, "step": 38970 }, { "epoch": 0.6843519022454748, "grad_norm": 0.070511066870787, "learning_rate": 0.00019313330528759892, "loss": 0.8455, "step": 38980 }, { "epoch": 0.6845274671254762, "grad_norm": 0.051798172212695205, "learning_rate": 0.00019312922683139479, "loss": 0.8526, "step": 38990 }, { "epoch": 0.6847030320054777, "grad_norm": 0.07215883328100364, "learning_rate": 0.0001931251472078957, "loss": 0.852, "step": 39000 }, { "epoch": 0.684878596885479, "grad_norm": 0.0618639252225575, "learning_rate": 0.00019312106641715336, "loss": 0.859, "step": 39010 }, { "epoch": 0.6850541617654804, "grad_norm": 0.05551352564423542, "learning_rate": 0.00019311698445921945, "loss": 0.8585, "step": 39020 }, { "epoch": 0.6852297266454819, "grad_norm": 0.058557227476278166, "learning_rate": 0.0001931129013341457, "loss": 0.8508, "step": 39030 }, { "epoch": 0.6854052915254832, "grad_norm": 0.0732357403262482, "learning_rate": 0.00019310881704198384, "loss": 0.8603, "step": 39040 }, { "epoch": 0.6855808564054846, "grad_norm": 0.09603189234764403, "learning_rate": 0.0001931047315827856, "loss": 0.8627, "step": 39050 }, { "epoch": 0.6857564212854861, "grad_norm": 0.05584783677663213, "learning_rate": 0.00019310064495660275, "loss": 0.8556, "step": 39060 }, { "epoch": 0.6859319861654875, "grad_norm": 0.0722215460733453, "learning_rate": 0.0001930965571634871, "loss": 0.8544, "step": 39070 }, { "epoch": 0.6861075510454888, "grad_norm": 0.0515256471561081, "learning_rate": 0.0001930924682034904, "loss": 0.8512, "step": 39080 }, { "epoch": 0.6862831159254903, "grad_norm": 0.06307840612849448, "learning_rate": 0.00019308837807666452, "loss": 0.8527, "step": 39090 }, { "epoch": 0.6864586808054917, "grad_norm": 0.07251188614187588, "learning_rate": 0.0001930842867830612, "loss": 0.8483, "step": 39100 }, { "epoch": 0.686634245685493, "grad_norm": 0.04592357363491942, "learning_rate": 0.00019308019432273232, "loss": 0.8508, "step": 39110 }, { "epoch": 0.6868098105654945, "grad_norm": 0.05842964633874537, "learning_rate": 0.00019307610069572976, "loss": 0.8631, "step": 39120 }, { "epoch": 0.6869853754454959, "grad_norm": 0.0548974517983271, "learning_rate": 0.00019307200590210533, "loss": 0.8468, "step": 39130 }, { "epoch": 0.6871609403254972, "grad_norm": 0.07734569009092715, "learning_rate": 0.00019306790994191096, "loss": 0.8581, "step": 39140 }, { "epoch": 0.6873365052054987, "grad_norm": 0.06702685900146772, "learning_rate": 0.0001930638128151985, "loss": 0.8541, "step": 39150 }, { "epoch": 0.6875120700855001, "grad_norm": 0.06147504787293771, "learning_rate": 0.0001930597145220199, "loss": 0.8593, "step": 39160 }, { "epoch": 0.6876876349655014, "grad_norm": 0.08401141176432524, "learning_rate": 0.00019305561506242707, "loss": 0.8422, "step": 39170 }, { "epoch": 0.6878631998455029, "grad_norm": 0.1019247445622885, "learning_rate": 0.00019305151443647196, "loss": 0.8534, "step": 39180 }, { "epoch": 0.6880387647255043, "grad_norm": 0.08465951681213887, "learning_rate": 0.0001930474126442065, "loss": 0.8588, "step": 39190 }, { "epoch": 0.6882143296055057, "grad_norm": 0.04819082361805634, "learning_rate": 0.00019304330968568273, "loss": 0.8538, "step": 39200 }, { "epoch": 0.6883898944855071, "grad_norm": 0.08349136568384237, "learning_rate": 0.00019303920556095253, "loss": 0.8564, "step": 39210 }, { "epoch": 0.6885654593655085, "grad_norm": 0.0803805851132133, "learning_rate": 0.000193035100270068, "loss": 0.8523, "step": 39220 }, { "epoch": 0.68874102424551, "grad_norm": 0.08243130545606239, "learning_rate": 0.00019303099381308108, "loss": 0.853, "step": 39230 }, { "epoch": 0.6889165891255113, "grad_norm": 0.05699464595008366, "learning_rate": 0.00019302688619004385, "loss": 0.8592, "step": 39240 }, { "epoch": 0.6890921540055127, "grad_norm": 0.058983319507713905, "learning_rate": 0.00019302277740100836, "loss": 0.8509, "step": 39250 }, { "epoch": 0.6892677188855142, "grad_norm": 0.05611598573331356, "learning_rate": 0.0001930186674460266, "loss": 0.852, "step": 39260 }, { "epoch": 0.6894432837655156, "grad_norm": 0.072694381629622, "learning_rate": 0.00019301455632515072, "loss": 0.8529, "step": 39270 }, { "epoch": 0.6896188486455169, "grad_norm": 0.06085516882022055, "learning_rate": 0.00019301044403843273, "loss": 0.8506, "step": 39280 }, { "epoch": 0.6897944135255184, "grad_norm": 0.051872929277386064, "learning_rate": 0.0001930063305859248, "loss": 0.8584, "step": 39290 }, { "epoch": 0.6899699784055198, "grad_norm": 0.06197476955839769, "learning_rate": 0.00019300221596767905, "loss": 0.8544, "step": 39300 }, { "epoch": 0.6901455432855211, "grad_norm": 0.054877418541637614, "learning_rate": 0.0001929981001837476, "loss": 0.8561, "step": 39310 }, { "epoch": 0.6903211081655226, "grad_norm": 0.05333720123494409, "learning_rate": 0.00019299398323418255, "loss": 0.8587, "step": 39320 }, { "epoch": 0.690496673045524, "grad_norm": 0.06290232228870606, "learning_rate": 0.00019298986511903613, "loss": 0.8525, "step": 39330 }, { "epoch": 0.6906722379255253, "grad_norm": 0.0667830221012249, "learning_rate": 0.00019298574583836048, "loss": 0.851, "step": 39340 }, { "epoch": 0.6908478028055268, "grad_norm": 0.07089261200270268, "learning_rate": 0.0001929816253922078, "loss": 0.8575, "step": 39350 }, { "epoch": 0.6910233676855282, "grad_norm": 0.06633403435046449, "learning_rate": 0.0001929775037806303, "loss": 0.859, "step": 39360 }, { "epoch": 0.6911989325655296, "grad_norm": 0.08159110217248701, "learning_rate": 0.00019297338100368022, "loss": 0.8515, "step": 39370 }, { "epoch": 0.691374497445531, "grad_norm": 0.05944260381423131, "learning_rate": 0.00019296925706140973, "loss": 0.848, "step": 39380 }, { "epoch": 0.6915500623255324, "grad_norm": 0.05161463902254573, "learning_rate": 0.00019296513195387116, "loss": 0.8547, "step": 39390 }, { "epoch": 0.6917256272055338, "grad_norm": 0.07701673482884352, "learning_rate": 0.00019296100568111672, "loss": 0.8582, "step": 39400 }, { "epoch": 0.6919011920855352, "grad_norm": 0.08289918849164897, "learning_rate": 0.00019295687824319875, "loss": 0.8509, "step": 39410 }, { "epoch": 0.6920767569655366, "grad_norm": 0.0815473795101243, "learning_rate": 0.00019295274964016947, "loss": 0.8497, "step": 39420 }, { "epoch": 0.692252321845538, "grad_norm": 0.06215755010616076, "learning_rate": 0.0001929486198720813, "loss": 0.8563, "step": 39430 }, { "epoch": 0.6924278867255395, "grad_norm": 0.10291679395820223, "learning_rate": 0.0001929444889389864, "loss": 0.8475, "step": 39440 }, { "epoch": 0.6926034516055408, "grad_norm": 0.090666628887683, "learning_rate": 0.00019294035684093726, "loss": 0.8494, "step": 39450 }, { "epoch": 0.6927790164855422, "grad_norm": 0.07973267476430718, "learning_rate": 0.00019293622357798615, "loss": 0.8572, "step": 39460 }, { "epoch": 0.6929545813655437, "grad_norm": 0.09042869485698973, "learning_rate": 0.00019293208915018549, "loss": 0.8468, "step": 39470 }, { "epoch": 0.693130146245545, "grad_norm": 0.07232202727710102, "learning_rate": 0.00019292795355758765, "loss": 0.8544, "step": 39480 }, { "epoch": 0.6933057111255464, "grad_norm": 0.05852719714458008, "learning_rate": 0.00019292381680024502, "loss": 0.8558, "step": 39490 }, { "epoch": 0.6934812760055479, "grad_norm": 0.08352401576331973, "learning_rate": 0.00019291967887820997, "loss": 0.8591, "step": 39500 }, { "epoch": 0.6936568408855492, "grad_norm": 0.059308469750079064, "learning_rate": 0.000192915539791535, "loss": 0.8533, "step": 39510 }, { "epoch": 0.6938324057655506, "grad_norm": 0.06687425893166533, "learning_rate": 0.00019291139954027254, "loss": 0.8549, "step": 39520 }, { "epoch": 0.6940079706455521, "grad_norm": 0.07771428369904236, "learning_rate": 0.000192907258124475, "loss": 0.849, "step": 39530 }, { "epoch": 0.6941835355255535, "grad_norm": 0.057288094928347084, "learning_rate": 0.00019290311554419492, "loss": 0.856, "step": 39540 }, { "epoch": 0.6943591004055548, "grad_norm": 0.0625256063533773, "learning_rate": 0.00019289897179948474, "loss": 0.848, "step": 39550 }, { "epoch": 0.6945346652855563, "grad_norm": 0.07113092426009918, "learning_rate": 0.00019289482689039696, "loss": 0.8514, "step": 39560 }, { "epoch": 0.6947102301655577, "grad_norm": 0.06180327669124641, "learning_rate": 0.00019289068081698415, "loss": 0.8557, "step": 39570 }, { "epoch": 0.6948857950455591, "grad_norm": 0.06398428514211486, "learning_rate": 0.00019288653357929877, "loss": 0.8529, "step": 39580 }, { "epoch": 0.6950613599255605, "grad_norm": 0.08926975517855078, "learning_rate": 0.0001928823851773934, "loss": 0.8507, "step": 39590 }, { "epoch": 0.6952369248055619, "grad_norm": 0.04769765364785748, "learning_rate": 0.0001928782356113206, "loss": 0.854, "step": 39600 }, { "epoch": 0.6954124896855634, "grad_norm": 0.05617235317217034, "learning_rate": 0.00019287408488113296, "loss": 0.8483, "step": 39610 }, { "epoch": 0.6955880545655647, "grad_norm": 0.06395089548322581, "learning_rate": 0.00019286993298688306, "loss": 0.8588, "step": 39620 }, { "epoch": 0.6957636194455661, "grad_norm": 0.06991060067644503, "learning_rate": 0.0001928657799286235, "loss": 0.8559, "step": 39630 }, { "epoch": 0.6959391843255676, "grad_norm": 0.07208240719484725, "learning_rate": 0.0001928616257064069, "loss": 0.8478, "step": 39640 }, { "epoch": 0.6961147492055689, "grad_norm": 0.0612707381454146, "learning_rate": 0.0001928574703202859, "loss": 0.8499, "step": 39650 }, { "epoch": 0.6962903140855703, "grad_norm": 0.06850549787667702, "learning_rate": 0.00019285331377031314, "loss": 0.8474, "step": 39660 }, { "epoch": 0.6964658789655718, "grad_norm": 0.057437290469697616, "learning_rate": 0.0001928491560565413, "loss": 0.8603, "step": 39670 }, { "epoch": 0.6966414438455731, "grad_norm": 0.07488015789621516, "learning_rate": 0.00019284499717902301, "loss": 0.8505, "step": 39680 }, { "epoch": 0.6968170087255745, "grad_norm": 0.07511974793172084, "learning_rate": 0.00019284083713781103, "loss": 0.853, "step": 39690 }, { "epoch": 0.696992573605576, "grad_norm": 0.10539632835165216, "learning_rate": 0.00019283667593295804, "loss": 0.8548, "step": 39700 }, { "epoch": 0.6971681384855773, "grad_norm": 0.08259317931424316, "learning_rate": 0.0001928325135645168, "loss": 0.8574, "step": 39710 }, { "epoch": 0.6973437033655787, "grad_norm": 0.05960858783279013, "learning_rate": 0.00019282835003253996, "loss": 0.8524, "step": 39720 }, { "epoch": 0.6975192682455802, "grad_norm": 0.0579568054693564, "learning_rate": 0.00019282418533708037, "loss": 0.8576, "step": 39730 }, { "epoch": 0.6976948331255816, "grad_norm": 0.06745398251273503, "learning_rate": 0.00019282001947819072, "loss": 0.859, "step": 39740 }, { "epoch": 0.6978703980055829, "grad_norm": 0.05242194629657755, "learning_rate": 0.00019281585245592387, "loss": 0.8665, "step": 39750 }, { "epoch": 0.6980459628855844, "grad_norm": 0.0686979600673577, "learning_rate": 0.00019281168427033254, "loss": 0.8511, "step": 39760 }, { "epoch": 0.6982215277655858, "grad_norm": 0.07325314077632923, "learning_rate": 0.00019280751492146958, "loss": 0.8423, "step": 39770 }, { "epoch": 0.6983970926455871, "grad_norm": 0.048946136820700134, "learning_rate": 0.0001928033444093878, "loss": 0.8529, "step": 39780 }, { "epoch": 0.6985726575255886, "grad_norm": 0.061469555929228786, "learning_rate": 0.00019279917273414004, "loss": 0.8551, "step": 39790 }, { "epoch": 0.69874822240559, "grad_norm": 0.09575740518183706, "learning_rate": 0.00019279499989577922, "loss": 0.8514, "step": 39800 }, { "epoch": 0.6989237872855913, "grad_norm": 0.06084184412577095, "learning_rate": 0.00019279082589435814, "loss": 0.858, "step": 39810 }, { "epoch": 0.6990993521655928, "grad_norm": 0.044651984636360866, "learning_rate": 0.00019278665072992968, "loss": 0.8623, "step": 39820 }, { "epoch": 0.6992749170455942, "grad_norm": 0.061281215682015444, "learning_rate": 0.00019278247440254677, "loss": 0.8467, "step": 39830 }, { "epoch": 0.6994504819255956, "grad_norm": 0.05270602271536164, "learning_rate": 0.00019277829691226233, "loss": 0.8538, "step": 39840 }, { "epoch": 0.699626046805597, "grad_norm": 0.0626273299082221, "learning_rate": 0.0001927741182591293, "loss": 0.8579, "step": 39850 }, { "epoch": 0.6998016116855984, "grad_norm": 0.06725882936533198, "learning_rate": 0.00019276993844320056, "loss": 0.8531, "step": 39860 }, { "epoch": 0.6999771765655998, "grad_norm": 0.059288342021241086, "learning_rate": 0.00019276575746452913, "loss": 0.8531, "step": 39870 }, { "epoch": 0.7001527414456012, "grad_norm": 0.06770062616155512, "learning_rate": 0.00019276157532316796, "loss": 0.8548, "step": 39880 }, { "epoch": 0.7003283063256026, "grad_norm": 0.06691647403903675, "learning_rate": 0.00019275739201917008, "loss": 0.8491, "step": 39890 }, { "epoch": 0.700503871205604, "grad_norm": 0.08820102267375056, "learning_rate": 0.0001927532075525884, "loss": 0.8547, "step": 39900 }, { "epoch": 0.7006794360856055, "grad_norm": 0.0786645396382141, "learning_rate": 0.00019274902192347606, "loss": 0.8528, "step": 39910 }, { "epoch": 0.7008550009656068, "grad_norm": 0.09000650980579282, "learning_rate": 0.000192744835131886, "loss": 0.8581, "step": 39920 }, { "epoch": 0.7010305658456083, "grad_norm": 0.062014816345771995, "learning_rate": 0.0001927406471778713, "loss": 0.8581, "step": 39930 }, { "epoch": 0.7012061307256097, "grad_norm": 0.08678221446084688, "learning_rate": 0.000192736458061485, "loss": 0.8553, "step": 39940 }, { "epoch": 0.701381695605611, "grad_norm": 0.05676554448753186, "learning_rate": 0.00019273226778278023, "loss": 0.8484, "step": 39950 }, { "epoch": 0.7015572604856125, "grad_norm": 0.05139759513784446, "learning_rate": 0.00019272807634181, "loss": 0.8612, "step": 39960 }, { "epoch": 0.7017328253656139, "grad_norm": 0.06034016056200693, "learning_rate": 0.00019272388373862748, "loss": 0.8588, "step": 39970 }, { "epoch": 0.7019083902456152, "grad_norm": 0.057607044031957794, "learning_rate": 0.00019271968997328578, "loss": 0.8536, "step": 39980 }, { "epoch": 0.7020839551256167, "grad_norm": 0.05030933331519665, "learning_rate": 0.00019271549504583802, "loss": 0.851, "step": 39990 }, { "epoch": 0.7022595200056181, "grad_norm": 0.07551186974064805, "learning_rate": 0.0001927112989563374, "loss": 0.8523, "step": 40000 }, { "epoch": 0.7024350848856195, "grad_norm": 0.06826745389743773, "learning_rate": 0.00019270710170483699, "loss": 0.8498, "step": 40010 }, { "epoch": 0.7026106497656209, "grad_norm": 0.0673760849795274, "learning_rate": 0.00019270290329139006, "loss": 0.8529, "step": 40020 }, { "epoch": 0.7027862146456223, "grad_norm": 0.07066615447766973, "learning_rate": 0.00019269870371604975, "loss": 0.8526, "step": 40030 }, { "epoch": 0.7029617795256237, "grad_norm": 0.06432651616368167, "learning_rate": 0.00019269450297886934, "loss": 0.8541, "step": 40040 }, { "epoch": 0.7031373444056251, "grad_norm": 0.06750360573297502, "learning_rate": 0.00019269030107990195, "loss": 0.8524, "step": 40050 }, { "epoch": 0.7033129092856265, "grad_norm": 0.08005612627467208, "learning_rate": 0.00019268609801920087, "loss": 0.8548, "step": 40060 }, { "epoch": 0.7034884741656279, "grad_norm": 0.0929884158060586, "learning_rate": 0.00019268189379681937, "loss": 0.8573, "step": 40070 }, { "epoch": 0.7036640390456294, "grad_norm": 0.06392614790820661, "learning_rate": 0.0001926776884128107, "loss": 0.8528, "step": 40080 }, { "epoch": 0.7038396039256307, "grad_norm": 0.08612564130215204, "learning_rate": 0.00019267348186722817, "loss": 0.8552, "step": 40090 }, { "epoch": 0.7040151688056321, "grad_norm": 0.07212923760919795, "learning_rate": 0.00019266927416012504, "loss": 0.8483, "step": 40100 }, { "epoch": 0.7041907336856336, "grad_norm": 0.05629359456703132, "learning_rate": 0.00019266506529155465, "loss": 0.8537, "step": 40110 }, { "epoch": 0.7043662985656349, "grad_norm": 0.08417697963957782, "learning_rate": 0.00019266085526157032, "loss": 0.8573, "step": 40120 }, { "epoch": 0.7045418634456363, "grad_norm": 0.07507479331321099, "learning_rate": 0.00019265664407022538, "loss": 0.8575, "step": 40130 }, { "epoch": 0.7047174283256378, "grad_norm": 0.05789201693726613, "learning_rate": 0.00019265243171757317, "loss": 0.8566, "step": 40140 }, { "epoch": 0.7048929932056391, "grad_norm": 0.06895235364978505, "learning_rate": 0.00019264821820366712, "loss": 0.848, "step": 40150 }, { "epoch": 0.7050685580856405, "grad_norm": 0.07878223614944117, "learning_rate": 0.00019264400352856056, "loss": 0.856, "step": 40160 }, { "epoch": 0.705244122965642, "grad_norm": 0.08936892313237853, "learning_rate": 0.0001926397876923069, "loss": 0.8591, "step": 40170 }, { "epoch": 0.7054196878456434, "grad_norm": 0.07582258065535043, "learning_rate": 0.00019263557069495958, "loss": 0.8625, "step": 40180 }, { "epoch": 0.7055952527256447, "grad_norm": 0.056070430770379445, "learning_rate": 0.00019263135253657206, "loss": 0.8545, "step": 40190 }, { "epoch": 0.7057708176056462, "grad_norm": 0.050227775411311015, "learning_rate": 0.00019262713321719767, "loss": 0.8532, "step": 40200 }, { "epoch": 0.7059463824856476, "grad_norm": 0.06359920532283089, "learning_rate": 0.00019262291273688996, "loss": 0.8475, "step": 40210 }, { "epoch": 0.7061219473656489, "grad_norm": 0.05808075924442314, "learning_rate": 0.00019261869109570243, "loss": 0.855, "step": 40220 }, { "epoch": 0.7062975122456504, "grad_norm": 0.07049556074470041, "learning_rate": 0.00019261446829368846, "loss": 0.8569, "step": 40230 }, { "epoch": 0.7064730771256518, "grad_norm": 0.0647314198312937, "learning_rate": 0.0001926102443309017, "loss": 0.8573, "step": 40240 }, { "epoch": 0.7066486420056531, "grad_norm": 0.07371664519931076, "learning_rate": 0.00019260601920739553, "loss": 0.8628, "step": 40250 }, { "epoch": 0.7068242068856546, "grad_norm": 0.056440609135191624, "learning_rate": 0.00019260179292322356, "loss": 0.8604, "step": 40260 }, { "epoch": 0.706999771765656, "grad_norm": 0.0547099350287864, "learning_rate": 0.00019259756547843927, "loss": 0.8625, "step": 40270 }, { "epoch": 0.7071753366456573, "grad_norm": 0.06745431809649541, "learning_rate": 0.00019259333687309636, "loss": 0.8555, "step": 40280 }, { "epoch": 0.7073509015256588, "grad_norm": 0.11555382225101422, "learning_rate": 0.00019258910710724824, "loss": 0.8495, "step": 40290 }, { "epoch": 0.7075264664056602, "grad_norm": 0.060307670027768136, "learning_rate": 0.00019258487618094863, "loss": 0.8572, "step": 40300 }, { "epoch": 0.7077020312856617, "grad_norm": 0.06341198875050888, "learning_rate": 0.00019258064409425108, "loss": 0.8619, "step": 40310 }, { "epoch": 0.707877596165663, "grad_norm": 0.07470955056721337, "learning_rate": 0.0001925764108472092, "loss": 0.846, "step": 40320 }, { "epoch": 0.7080531610456644, "grad_norm": 0.062031412084366175, "learning_rate": 0.00019257217643987664, "loss": 0.8561, "step": 40330 }, { "epoch": 0.7082287259256659, "grad_norm": 0.05020595433020414, "learning_rate": 0.00019256794087230708, "loss": 0.8509, "step": 40340 }, { "epoch": 0.7084042908056672, "grad_norm": 0.056798834610652216, "learning_rate": 0.0001925637041445541, "loss": 0.854, "step": 40350 }, { "epoch": 0.7085798556856686, "grad_norm": 0.05160124986006178, "learning_rate": 0.0001925594662566715, "loss": 0.8539, "step": 40360 }, { "epoch": 0.7087554205656701, "grad_norm": 0.06313353980191602, "learning_rate": 0.0001925552272087129, "loss": 0.8524, "step": 40370 }, { "epoch": 0.7089309854456715, "grad_norm": 0.06994214543913452, "learning_rate": 0.00019255098700073203, "loss": 0.8502, "step": 40380 }, { "epoch": 0.7091065503256728, "grad_norm": 0.08532218635058296, "learning_rate": 0.00019254674563278258, "loss": 0.8554, "step": 40390 }, { "epoch": 0.7092821152056743, "grad_norm": 0.06381129781994219, "learning_rate": 0.00019254250310491838, "loss": 0.8567, "step": 40400 }, { "epoch": 0.7094576800856757, "grad_norm": 0.08768667690618347, "learning_rate": 0.00019253825941719307, "loss": 0.8529, "step": 40410 }, { "epoch": 0.709633244965677, "grad_norm": 0.06199940668579513, "learning_rate": 0.00019253401456966046, "loss": 0.8425, "step": 40420 }, { "epoch": 0.7098088098456785, "grad_norm": 0.07241266129129936, "learning_rate": 0.00019252976856237437, "loss": 0.8552, "step": 40430 }, { "epoch": 0.7099843747256799, "grad_norm": 0.05419095243465862, "learning_rate": 0.0001925255213953886, "loss": 0.8614, "step": 40440 }, { "epoch": 0.7101599396056812, "grad_norm": 0.07158035741887907, "learning_rate": 0.0001925212730687569, "loss": 0.8544, "step": 40450 }, { "epoch": 0.7103355044856827, "grad_norm": 0.06531853207314471, "learning_rate": 0.00019251702358253313, "loss": 0.8512, "step": 40460 }, { "epoch": 0.7105110693656841, "grad_norm": 0.06333540304633178, "learning_rate": 0.00019251277293677114, "loss": 0.856, "step": 40470 }, { "epoch": 0.7106866342456855, "grad_norm": 0.0917594856457596, "learning_rate": 0.00019250852113152474, "loss": 0.8567, "step": 40480 }, { "epoch": 0.7108621991256869, "grad_norm": 0.061034748858780415, "learning_rate": 0.00019250426816684788, "loss": 0.8446, "step": 40490 }, { "epoch": 0.7110377640056883, "grad_norm": 0.07071334088181173, "learning_rate": 0.0001925000140427944, "loss": 0.8589, "step": 40500 }, { "epoch": 0.7112133288856897, "grad_norm": 0.0835492689684211, "learning_rate": 0.00019249575875941822, "loss": 0.8462, "step": 40510 }, { "epoch": 0.7113888937656911, "grad_norm": 0.07838018562660146, "learning_rate": 0.0001924915023167732, "loss": 0.8528, "step": 40520 }, { "epoch": 0.7115644586456925, "grad_norm": 0.08609882896090715, "learning_rate": 0.00019248724471491332, "loss": 0.8494, "step": 40530 }, { "epoch": 0.7117400235256939, "grad_norm": 0.04554676864957363, "learning_rate": 0.00019248298595389254, "loss": 0.8571, "step": 40540 }, { "epoch": 0.7119155884056954, "grad_norm": 0.06619695297987577, "learning_rate": 0.0001924787260337648, "loss": 0.8491, "step": 40550 }, { "epoch": 0.7120911532856967, "grad_norm": 0.0710183811152742, "learning_rate": 0.00019247446495458404, "loss": 0.8494, "step": 40560 }, { "epoch": 0.7122667181656981, "grad_norm": 0.063792567317728, "learning_rate": 0.0001924702027164043, "loss": 0.8583, "step": 40570 }, { "epoch": 0.7124422830456996, "grad_norm": 0.06208690367423301, "learning_rate": 0.00019246593931927954, "loss": 0.8594, "step": 40580 }, { "epoch": 0.7126178479257009, "grad_norm": 0.07543496911794001, "learning_rate": 0.00019246167476326382, "loss": 0.8567, "step": 40590 }, { "epoch": 0.7127934128057023, "grad_norm": 0.05351122906322302, "learning_rate": 0.00019245740904841115, "loss": 0.8578, "step": 40600 }, { "epoch": 0.7129689776857038, "grad_norm": 0.08848229452504475, "learning_rate": 0.00019245314217477557, "loss": 0.8613, "step": 40610 }, { "epoch": 0.7131445425657051, "grad_norm": 0.05357183785745211, "learning_rate": 0.00019244887414241116, "loss": 0.8477, "step": 40620 }, { "epoch": 0.7133201074457065, "grad_norm": 0.07150899206688024, "learning_rate": 0.00019244460495137199, "loss": 0.8555, "step": 40630 }, { "epoch": 0.713495672325708, "grad_norm": 0.05604692932117298, "learning_rate": 0.00019244033460171214, "loss": 0.8466, "step": 40640 }, { "epoch": 0.7136712372057094, "grad_norm": 0.07155515810246776, "learning_rate": 0.00019243606309348577, "loss": 0.8503, "step": 40650 }, { "epoch": 0.7138468020857108, "grad_norm": 0.0598349392503002, "learning_rate": 0.0001924317904267469, "loss": 0.8503, "step": 40660 }, { "epoch": 0.7140223669657122, "grad_norm": 0.06432666955176702, "learning_rate": 0.0001924275166015497, "loss": 0.852, "step": 40670 }, { "epoch": 0.7141979318457136, "grad_norm": 0.057687519786230414, "learning_rate": 0.0001924232416179484, "loss": 0.8589, "step": 40680 }, { "epoch": 0.714373496725715, "grad_norm": 0.07593613338250556, "learning_rate": 0.0001924189654759971, "loss": 0.8557, "step": 40690 }, { "epoch": 0.7145490616057164, "grad_norm": 0.07712329896602356, "learning_rate": 0.00019241468817574996, "loss": 0.8536, "step": 40700 }, { "epoch": 0.7147246264857178, "grad_norm": 0.06960471940631713, "learning_rate": 0.00019241040971726124, "loss": 0.8495, "step": 40710 }, { "epoch": 0.7149001913657193, "grad_norm": 0.07526239216597744, "learning_rate": 0.0001924061301005851, "loss": 0.8497, "step": 40720 }, { "epoch": 0.7150757562457206, "grad_norm": 0.06826442429987162, "learning_rate": 0.00019240184932577576, "loss": 0.8549, "step": 40730 }, { "epoch": 0.715251321125722, "grad_norm": 0.09228528575022618, "learning_rate": 0.00019239756739288747, "loss": 0.8491, "step": 40740 }, { "epoch": 0.7154268860057235, "grad_norm": 0.059673747044249506, "learning_rate": 0.00019239328430197453, "loss": 0.8515, "step": 40750 }, { "epoch": 0.7156024508857248, "grad_norm": 0.10302134193070571, "learning_rate": 0.00019238900005309108, "loss": 0.8573, "step": 40760 }, { "epoch": 0.7157780157657262, "grad_norm": 0.06531163917460162, "learning_rate": 0.00019238471464629153, "loss": 0.8586, "step": 40770 }, { "epoch": 0.7159535806457277, "grad_norm": 0.05003231203496736, "learning_rate": 0.00019238042808163015, "loss": 0.8616, "step": 40780 }, { "epoch": 0.716129145525729, "grad_norm": 0.049590707256350126, "learning_rate": 0.00019237614035916119, "loss": 0.8572, "step": 40790 }, { "epoch": 0.7163047104057304, "grad_norm": 0.07198723891074335, "learning_rate": 0.00019237185147893905, "loss": 0.853, "step": 40800 }, { "epoch": 0.7164802752857319, "grad_norm": 0.05901154553512088, "learning_rate": 0.00019236756144101804, "loss": 0.8504, "step": 40810 }, { "epoch": 0.7166558401657332, "grad_norm": 0.06520520898641255, "learning_rate": 0.0001923632702454525, "loss": 0.8557, "step": 40820 }, { "epoch": 0.7168314050457346, "grad_norm": 0.09378214467176965, "learning_rate": 0.00019235897789229682, "loss": 0.8527, "step": 40830 }, { "epoch": 0.7170069699257361, "grad_norm": 0.06945690130699264, "learning_rate": 0.0001923546843816054, "loss": 0.8543, "step": 40840 }, { "epoch": 0.7171825348057375, "grad_norm": 0.07247302897478629, "learning_rate": 0.0001923503897134326, "loss": 0.8492, "step": 40850 }, { "epoch": 0.7173580996857388, "grad_norm": 0.06530772636123368, "learning_rate": 0.00019234609388783287, "loss": 0.8479, "step": 40860 }, { "epoch": 0.7175336645657403, "grad_norm": 0.055463666140670026, "learning_rate": 0.00019234179690486063, "loss": 0.8571, "step": 40870 }, { "epoch": 0.7177092294457417, "grad_norm": 0.06815727315668724, "learning_rate": 0.0001923374987645703, "loss": 0.8595, "step": 40880 }, { "epoch": 0.717884794325743, "grad_norm": 0.07188126822142968, "learning_rate": 0.00019233319946701638, "loss": 0.8629, "step": 40890 }, { "epoch": 0.7180603592057445, "grad_norm": 0.08179265584577546, "learning_rate": 0.00019232889901225325, "loss": 0.8444, "step": 40900 }, { "epoch": 0.7182359240857459, "grad_norm": 0.056096007811202275, "learning_rate": 0.00019232459740033554, "loss": 0.8606, "step": 40910 }, { "epoch": 0.7184114889657472, "grad_norm": 0.07064148818590282, "learning_rate": 0.00019232029463131766, "loss": 0.8517, "step": 40920 }, { "epoch": 0.7185870538457487, "grad_norm": 0.062401529905027814, "learning_rate": 0.00019231599070525414, "loss": 0.8565, "step": 40930 }, { "epoch": 0.7187626187257501, "grad_norm": 0.11392660224733912, "learning_rate": 0.0001923116856221995, "loss": 0.858, "step": 40940 }, { "epoch": 0.7189381836057515, "grad_norm": 0.0650536833659954, "learning_rate": 0.00019230737938220833, "loss": 0.8576, "step": 40950 }, { "epoch": 0.7191137484857529, "grad_norm": 0.07877526642704848, "learning_rate": 0.00019230307198533518, "loss": 0.8559, "step": 40960 }, { "epoch": 0.7192893133657543, "grad_norm": 0.08064863234330585, "learning_rate": 0.00019229876343163457, "loss": 0.8567, "step": 40970 }, { "epoch": 0.7194648782457557, "grad_norm": 0.0716722190696521, "learning_rate": 0.00019229445372116112, "loss": 0.8481, "step": 40980 }, { "epoch": 0.7196404431257571, "grad_norm": 0.05217606874592304, "learning_rate": 0.0001922901428539695, "loss": 0.8601, "step": 40990 }, { "epoch": 0.7198160080057585, "grad_norm": 0.08972730239930479, "learning_rate": 0.00019228583083011423, "loss": 0.8574, "step": 41000 }, { "epoch": 0.7199915728857599, "grad_norm": 0.055185017664135735, "learning_rate": 0.00019228151764965, "loss": 0.8563, "step": 41010 }, { "epoch": 0.7201671377657614, "grad_norm": 0.0642685641758122, "learning_rate": 0.00019227720331263146, "loss": 0.8543, "step": 41020 }, { "epoch": 0.7203427026457627, "grad_norm": 0.07126838313131871, "learning_rate": 0.00019227288781911323, "loss": 0.8576, "step": 41030 }, { "epoch": 0.7205182675257642, "grad_norm": 0.06266225332822965, "learning_rate": 0.00019226857116915006, "loss": 0.855, "step": 41040 }, { "epoch": 0.7206938324057656, "grad_norm": 0.05051464356879038, "learning_rate": 0.00019226425336279658, "loss": 0.86, "step": 41050 }, { "epoch": 0.7208693972857669, "grad_norm": 0.06850049425284685, "learning_rate": 0.0001922599344001075, "loss": 0.8493, "step": 41060 }, { "epoch": 0.7210449621657684, "grad_norm": 0.05414267756963482, "learning_rate": 0.00019225561428113758, "loss": 0.852, "step": 41070 }, { "epoch": 0.7212205270457698, "grad_norm": 0.09544593383682366, "learning_rate": 0.00019225129300594156, "loss": 0.8558, "step": 41080 }, { "epoch": 0.7213960919257711, "grad_norm": 0.06333672098790791, "learning_rate": 0.00019224697057457414, "loss": 0.8552, "step": 41090 }, { "epoch": 0.7215716568057726, "grad_norm": 0.0771624506047455, "learning_rate": 0.00019224264698709012, "loss": 0.8623, "step": 41100 }, { "epoch": 0.721747221685774, "grad_norm": 0.0735347773793183, "learning_rate": 0.00019223832224354428, "loss": 0.8481, "step": 41110 }, { "epoch": 0.7219227865657754, "grad_norm": 0.07847556874922292, "learning_rate": 0.00019223399634399143, "loss": 0.8569, "step": 41120 }, { "epoch": 0.7220983514457768, "grad_norm": 0.05263539827215551, "learning_rate": 0.00019222966928848635, "loss": 0.8531, "step": 41130 }, { "epoch": 0.7222739163257782, "grad_norm": 0.07260781811270274, "learning_rate": 0.00019222534107708386, "loss": 0.8536, "step": 41140 }, { "epoch": 0.7224494812057796, "grad_norm": 0.057075316471987825, "learning_rate": 0.00019222101170983882, "loss": 0.8535, "step": 41150 }, { "epoch": 0.722625046085781, "grad_norm": 0.07235300750646124, "learning_rate": 0.0001922166811868061, "loss": 0.8577, "step": 41160 }, { "epoch": 0.7228006109657824, "grad_norm": 0.055625380845334686, "learning_rate": 0.0001922123495080405, "loss": 0.8577, "step": 41170 }, { "epoch": 0.7229761758457838, "grad_norm": 0.062913286099711, "learning_rate": 0.000192208016673597, "loss": 0.8559, "step": 41180 }, { "epoch": 0.7231517407257853, "grad_norm": 0.08174737430690163, "learning_rate": 0.00019220368268353042, "loss": 0.846, "step": 41190 }, { "epoch": 0.7233273056057866, "grad_norm": 0.0478226750514807, "learning_rate": 0.0001921993475378957, "loss": 0.8535, "step": 41200 }, { "epoch": 0.723502870485788, "grad_norm": 0.06437284047010983, "learning_rate": 0.0001921950112367478, "loss": 0.8538, "step": 41210 }, { "epoch": 0.7236784353657895, "grad_norm": 0.07098144902359291, "learning_rate": 0.00019219067378014159, "loss": 0.8572, "step": 41220 }, { "epoch": 0.7238540002457908, "grad_norm": 0.09159410948177986, "learning_rate": 0.0001921863351681321, "loss": 0.8501, "step": 41230 }, { "epoch": 0.7240295651257922, "grad_norm": 0.0732180444619823, "learning_rate": 0.00019218199540077424, "loss": 0.8488, "step": 41240 }, { "epoch": 0.7242051300057937, "grad_norm": 0.06961050928183718, "learning_rate": 0.00019217765447812306, "loss": 0.8545, "step": 41250 }, { "epoch": 0.724380694885795, "grad_norm": 0.05305235105744925, "learning_rate": 0.0001921733124002335, "loss": 0.8592, "step": 41260 }, { "epoch": 0.7245562597657964, "grad_norm": 0.06442110220951865, "learning_rate": 0.00019216896916716058, "loss": 0.8618, "step": 41270 }, { "epoch": 0.7247318246457979, "grad_norm": 0.06681188729386295, "learning_rate": 0.0001921646247789594, "loss": 0.8425, "step": 41280 }, { "epoch": 0.7249073895257993, "grad_norm": 0.0988456661833109, "learning_rate": 0.00019216027923568493, "loss": 0.8596, "step": 41290 }, { "epoch": 0.7250829544058006, "grad_norm": 0.053590953122201794, "learning_rate": 0.0001921559325373922, "loss": 0.8567, "step": 41300 }, { "epoch": 0.7252585192858021, "grad_norm": 0.07281803549322106, "learning_rate": 0.0001921515846841364, "loss": 0.8586, "step": 41310 }, { "epoch": 0.7254340841658035, "grad_norm": 0.08233978364631804, "learning_rate": 0.00019214723567597254, "loss": 0.8605, "step": 41320 }, { "epoch": 0.7256096490458048, "grad_norm": 0.055531399492499973, "learning_rate": 0.0001921428855129557, "loss": 0.8605, "step": 41330 }, { "epoch": 0.7257852139258063, "grad_norm": 0.07327506071356443, "learning_rate": 0.0001921385341951411, "loss": 0.8567, "step": 41340 }, { "epoch": 0.7259607788058077, "grad_norm": 0.07579730737062462, "learning_rate": 0.00019213418172258373, "loss": 0.8542, "step": 41350 }, { "epoch": 0.726136343685809, "grad_norm": 0.059236666219940504, "learning_rate": 0.00019212982809533886, "loss": 0.8488, "step": 41360 }, { "epoch": 0.7263119085658105, "grad_norm": 0.07074983006810254, "learning_rate": 0.00019212547331346158, "loss": 0.8611, "step": 41370 }, { "epoch": 0.7264874734458119, "grad_norm": 0.0614739908076061, "learning_rate": 0.0001921211173770071, "loss": 0.8484, "step": 41380 }, { "epoch": 0.7266630383258134, "grad_norm": 0.07467274784910788, "learning_rate": 0.00019211676028603057, "loss": 0.8515, "step": 41390 }, { "epoch": 0.7268386032058147, "grad_norm": 0.06102583700383781, "learning_rate": 0.0001921124020405873, "loss": 0.8566, "step": 41400 }, { "epoch": 0.7270141680858161, "grad_norm": 0.05548945233312262, "learning_rate": 0.00019210804264073236, "loss": 0.8489, "step": 41410 }, { "epoch": 0.7271897329658176, "grad_norm": 0.06884314092630858, "learning_rate": 0.0001921036820865211, "loss": 0.8539, "step": 41420 }, { "epoch": 0.7273652978458189, "grad_norm": 0.06681588307556562, "learning_rate": 0.00019209932037800873, "loss": 0.8409, "step": 41430 }, { "epoch": 0.7275408627258203, "grad_norm": 0.07018868445921589, "learning_rate": 0.00019209495751525049, "loss": 0.8511, "step": 41440 }, { "epoch": 0.7277164276058218, "grad_norm": 0.08883011130974354, "learning_rate": 0.00019209059349830168, "loss": 0.8498, "step": 41450 }, { "epoch": 0.7278919924858231, "grad_norm": 0.05860884524557781, "learning_rate": 0.0001920862283272176, "loss": 0.8467, "step": 41460 }, { "epoch": 0.7280675573658245, "grad_norm": 0.062154192089645, "learning_rate": 0.00019208186200205354, "loss": 0.8534, "step": 41470 }, { "epoch": 0.728243122245826, "grad_norm": 0.06918745644328879, "learning_rate": 0.00019207749452286485, "loss": 0.8536, "step": 41480 }, { "epoch": 0.7284186871258274, "grad_norm": 0.06987809098147872, "learning_rate": 0.00019207312588970682, "loss": 0.8597, "step": 41490 }, { "epoch": 0.7285942520058287, "grad_norm": 0.06648024988295402, "learning_rate": 0.00019206875610263487, "loss": 0.8563, "step": 41500 }, { "epoch": 0.7287698168858302, "grad_norm": 0.06054428865001836, "learning_rate": 0.0001920643851617043, "loss": 0.8532, "step": 41510 }, { "epoch": 0.7289453817658316, "grad_norm": 0.08572841857322734, "learning_rate": 0.00019206001306697055, "loss": 0.8492, "step": 41520 }, { "epoch": 0.7291209466458329, "grad_norm": 0.06637976285418597, "learning_rate": 0.00019205563981848894, "loss": 0.8484, "step": 41530 }, { "epoch": 0.7292965115258344, "grad_norm": 0.08718326242308899, "learning_rate": 0.00019205126541631495, "loss": 0.8542, "step": 41540 }, { "epoch": 0.7294720764058358, "grad_norm": 0.06533664097368844, "learning_rate": 0.00019204688986050397, "loss": 0.8467, "step": 41550 }, { "epoch": 0.7296476412858371, "grad_norm": 0.06692804737566767, "learning_rate": 0.00019204251315111144, "loss": 0.8493, "step": 41560 }, { "epoch": 0.7298232061658386, "grad_norm": 0.06669041222204283, "learning_rate": 0.00019203813528819282, "loss": 0.8515, "step": 41570 }, { "epoch": 0.72999877104584, "grad_norm": 0.0667100629163934, "learning_rate": 0.00019203375627180357, "loss": 0.8568, "step": 41580 }, { "epoch": 0.7301743359258414, "grad_norm": 0.07918849344008952, "learning_rate": 0.00019202937610199924, "loss": 0.8514, "step": 41590 }, { "epoch": 0.7303499008058428, "grad_norm": 0.08090038972082882, "learning_rate": 0.0001920249947788352, "loss": 0.8575, "step": 41600 }, { "epoch": 0.7305254656858442, "grad_norm": 0.05902481529888745, "learning_rate": 0.00019202061230236706, "loss": 0.8541, "step": 41610 }, { "epoch": 0.7307010305658456, "grad_norm": 0.06554218887731347, "learning_rate": 0.00019201622867265033, "loss": 0.8432, "step": 41620 }, { "epoch": 0.730876595445847, "grad_norm": 0.05109700214498215, "learning_rate": 0.0001920118438897405, "loss": 0.8579, "step": 41630 }, { "epoch": 0.7310521603258484, "grad_norm": 0.0870847113093831, "learning_rate": 0.0001920074579536932, "loss": 0.855, "step": 41640 }, { "epoch": 0.7312277252058498, "grad_norm": 0.06672960728942325, "learning_rate": 0.00019200307086456392, "loss": 0.8517, "step": 41650 }, { "epoch": 0.7314032900858513, "grad_norm": 0.06963570618095959, "learning_rate": 0.00019199868262240836, "loss": 0.8569, "step": 41660 }, { "epoch": 0.7315788549658526, "grad_norm": 0.0604864717252325, "learning_rate": 0.00019199429322728202, "loss": 0.8529, "step": 41670 }, { "epoch": 0.731754419845854, "grad_norm": 0.07074809218564695, "learning_rate": 0.00019198990267924054, "loss": 0.8522, "step": 41680 }, { "epoch": 0.7319299847258555, "grad_norm": 0.07741907626664224, "learning_rate": 0.00019198551097833954, "loss": 0.8553, "step": 41690 }, { "epoch": 0.7321055496058568, "grad_norm": 0.07156291399585768, "learning_rate": 0.0001919811181246347, "loss": 0.8523, "step": 41700 }, { "epoch": 0.7322811144858582, "grad_norm": 0.05199088369172314, "learning_rate": 0.00019197672411818168, "loss": 0.8517, "step": 41710 }, { "epoch": 0.7324566793658597, "grad_norm": 0.07410704115284417, "learning_rate": 0.0001919723289590361, "loss": 0.8513, "step": 41720 }, { "epoch": 0.732632244245861, "grad_norm": 0.06347352899849315, "learning_rate": 0.0001919679326472537, "loss": 0.8587, "step": 41730 }, { "epoch": 0.7328078091258624, "grad_norm": 0.05807932068617638, "learning_rate": 0.00019196353518289013, "loss": 0.8533, "step": 41740 }, { "epoch": 0.7329833740058639, "grad_norm": 0.0706125815882134, "learning_rate": 0.00019195913656600114, "loss": 0.8676, "step": 41750 }, { "epoch": 0.7331589388858653, "grad_norm": 0.054199879633621544, "learning_rate": 0.00019195473679664245, "loss": 0.855, "step": 41760 }, { "epoch": 0.7333345037658667, "grad_norm": 0.07083921303970052, "learning_rate": 0.00019195033587486986, "loss": 0.856, "step": 41770 }, { "epoch": 0.7335100686458681, "grad_norm": 0.06807940444850266, "learning_rate": 0.00019194593380073905, "loss": 0.8524, "step": 41780 }, { "epoch": 0.7336856335258695, "grad_norm": 0.06582011766936863, "learning_rate": 0.00019194153057430584, "loss": 0.8515, "step": 41790 }, { "epoch": 0.7338611984058709, "grad_norm": 0.06458613831609501, "learning_rate": 0.00019193712619562599, "loss": 0.8534, "step": 41800 }, { "epoch": 0.7340367632858723, "grad_norm": 0.0710525368071661, "learning_rate": 0.00019193272066475535, "loss": 0.8547, "step": 41810 }, { "epoch": 0.7342123281658737, "grad_norm": 0.06533630147388196, "learning_rate": 0.00019192831398174965, "loss": 0.8525, "step": 41820 }, { "epoch": 0.7343878930458752, "grad_norm": 0.05766203568208778, "learning_rate": 0.00019192390614666485, "loss": 0.8569, "step": 41830 }, { "epoch": 0.7345634579258765, "grad_norm": 0.05928881481505614, "learning_rate": 0.00019191949715955672, "loss": 0.8584, "step": 41840 }, { "epoch": 0.7347390228058779, "grad_norm": 0.08220713814183263, "learning_rate": 0.00019191508702048112, "loss": 0.8479, "step": 41850 }, { "epoch": 0.7349145876858794, "grad_norm": 0.06903736561967637, "learning_rate": 0.00019191067572949398, "loss": 0.8462, "step": 41860 }, { "epoch": 0.7350901525658807, "grad_norm": 0.05535131479150352, "learning_rate": 0.00019190626328665112, "loss": 0.8499, "step": 41870 }, { "epoch": 0.7352657174458821, "grad_norm": 0.04418016080274009, "learning_rate": 0.00019190184969200848, "loss": 0.8482, "step": 41880 }, { "epoch": 0.7354412823258836, "grad_norm": 0.055365908014858176, "learning_rate": 0.000191897434945622, "loss": 0.853, "step": 41890 }, { "epoch": 0.7356168472058849, "grad_norm": 0.05648825865455968, "learning_rate": 0.00019189301904754758, "loss": 0.8432, "step": 41900 }, { "epoch": 0.7357924120858863, "grad_norm": 0.06141361672770195, "learning_rate": 0.00019188860199784123, "loss": 0.8439, "step": 41910 }, { "epoch": 0.7359679769658878, "grad_norm": 0.05237978612837326, "learning_rate": 0.00019188418379655883, "loss": 0.852, "step": 41920 }, { "epoch": 0.7361435418458891, "grad_norm": 0.06884684080682875, "learning_rate": 0.00019187976444375645, "loss": 0.8559, "step": 41930 }, { "epoch": 0.7363191067258905, "grad_norm": 0.06627078358003316, "learning_rate": 0.00019187534393949, "loss": 0.849, "step": 41940 }, { "epoch": 0.736494671605892, "grad_norm": 0.05516337570345281, "learning_rate": 0.00019187092228381553, "loss": 0.8541, "step": 41950 }, { "epoch": 0.7366702364858934, "grad_norm": 0.05411168139049187, "learning_rate": 0.00019186649947678907, "loss": 0.8534, "step": 41960 }, { "epoch": 0.7368458013658947, "grad_norm": 0.05030790422918472, "learning_rate": 0.00019186207551846664, "loss": 0.8491, "step": 41970 }, { "epoch": 0.7370213662458962, "grad_norm": 0.08649242063828952, "learning_rate": 0.0001918576504089043, "loss": 0.8517, "step": 41980 }, { "epoch": 0.7371969311258976, "grad_norm": 0.07146140215788527, "learning_rate": 0.0001918532241481581, "loss": 0.8518, "step": 41990 }, { "epoch": 0.7373724960058989, "grad_norm": 0.06318181150594072, "learning_rate": 0.00019184879673628415, "loss": 0.8495, "step": 42000 }, { "epoch": 0.7375480608859004, "grad_norm": 0.04911692271438476, "learning_rate": 0.00019184436817333854, "loss": 0.8478, "step": 42010 }, { "epoch": 0.7377236257659018, "grad_norm": 0.0743884540788653, "learning_rate": 0.0001918399384593774, "loss": 0.8518, "step": 42020 }, { "epoch": 0.7378991906459031, "grad_norm": 0.06131054850299415, "learning_rate": 0.00019183550759445682, "loss": 0.8494, "step": 42030 }, { "epoch": 0.7380747555259046, "grad_norm": 0.07204146136591467, "learning_rate": 0.00019183107557863292, "loss": 0.8526, "step": 42040 }, { "epoch": 0.738250320405906, "grad_norm": 0.0770639771637805, "learning_rate": 0.0001918266424119619, "loss": 0.8573, "step": 42050 }, { "epoch": 0.7384258852859074, "grad_norm": 0.056213196354656984, "learning_rate": 0.00019182220809449988, "loss": 0.8499, "step": 42060 }, { "epoch": 0.7386014501659088, "grad_norm": 0.07993380854865577, "learning_rate": 0.00019181777262630313, "loss": 0.8466, "step": 42070 }, { "epoch": 0.7387770150459102, "grad_norm": 0.06349126329786077, "learning_rate": 0.0001918133360074278, "loss": 0.8529, "step": 42080 }, { "epoch": 0.7389525799259116, "grad_norm": 0.05431233989208571, "learning_rate": 0.00019180889823793004, "loss": 0.8529, "step": 42090 }, { "epoch": 0.739128144805913, "grad_norm": 0.06015516864128651, "learning_rate": 0.00019180445931786617, "loss": 0.8455, "step": 42100 }, { "epoch": 0.7393037096859144, "grad_norm": 0.06118978558607309, "learning_rate": 0.0001918000192472924, "loss": 0.8612, "step": 42110 }, { "epoch": 0.7394792745659159, "grad_norm": 0.05491308317056076, "learning_rate": 0.000191795578026265, "loss": 0.8474, "step": 42120 }, { "epoch": 0.7396548394459173, "grad_norm": 0.05324015440791035, "learning_rate": 0.0001917911356548402, "loss": 0.847, "step": 42130 }, { "epoch": 0.7398304043259186, "grad_norm": 0.06510685189528716, "learning_rate": 0.00019178669213307432, "loss": 0.8558, "step": 42140 }, { "epoch": 0.7400059692059201, "grad_norm": 0.07212088995340919, "learning_rate": 0.00019178224746102364, "loss": 0.8496, "step": 42150 }, { "epoch": 0.7401815340859215, "grad_norm": 0.06690921465272831, "learning_rate": 0.00019177780163874452, "loss": 0.8613, "step": 42160 }, { "epoch": 0.7403570989659228, "grad_norm": 0.07419316223395012, "learning_rate": 0.00019177335466629322, "loss": 0.8554, "step": 42170 }, { "epoch": 0.7405326638459243, "grad_norm": 0.06705817201824564, "learning_rate": 0.0001917689065437261, "loss": 0.8562, "step": 42180 }, { "epoch": 0.7407082287259257, "grad_norm": 0.07961908843048297, "learning_rate": 0.00019176445727109962, "loss": 0.8523, "step": 42190 }, { "epoch": 0.740883793605927, "grad_norm": 0.0613690049009976, "learning_rate": 0.00019176000684847003, "loss": 0.8605, "step": 42200 }, { "epoch": 0.7410593584859285, "grad_norm": 0.05431219176933614, "learning_rate": 0.00019175555527589376, "loss": 0.8548, "step": 42210 }, { "epoch": 0.7412349233659299, "grad_norm": 0.07304118623601136, "learning_rate": 0.0001917511025534272, "loss": 0.8641, "step": 42220 }, { "epoch": 0.7414104882459313, "grad_norm": 0.07200687431119446, "learning_rate": 0.00019174664868112676, "loss": 0.8611, "step": 42230 }, { "epoch": 0.7415860531259327, "grad_norm": 0.05667991305945278, "learning_rate": 0.0001917421936590489, "loss": 0.8539, "step": 42240 }, { "epoch": 0.7417616180059341, "grad_norm": 0.05218953950687969, "learning_rate": 0.00019173773748725007, "loss": 0.853, "step": 42250 }, { "epoch": 0.7419371828859355, "grad_norm": 0.05464323829602822, "learning_rate": 0.00019173328016578673, "loss": 0.855, "step": 42260 }, { "epoch": 0.7421127477659369, "grad_norm": 0.14908557751885032, "learning_rate": 0.0001917288216947153, "loss": 0.8526, "step": 42270 }, { "epoch": 0.7422883126459383, "grad_norm": 0.04655072479145387, "learning_rate": 0.00019172436207409236, "loss": 0.8515, "step": 42280 }, { "epoch": 0.7424638775259397, "grad_norm": 0.09146094248614521, "learning_rate": 0.00019171990130397435, "loss": 0.8464, "step": 42290 }, { "epoch": 0.7426394424059412, "grad_norm": 0.06827389521718434, "learning_rate": 0.00019171543938441778, "loss": 0.8582, "step": 42300 }, { "epoch": 0.7428150072859425, "grad_norm": 0.04824347520185968, "learning_rate": 0.00019171097631547922, "loss": 0.8467, "step": 42310 }, { "epoch": 0.7429905721659439, "grad_norm": 0.08095592792119338, "learning_rate": 0.0001917065120972152, "loss": 0.8534, "step": 42320 }, { "epoch": 0.7431661370459454, "grad_norm": 0.058717642133453464, "learning_rate": 0.0001917020467296823, "loss": 0.8535, "step": 42330 }, { "epoch": 0.7433417019259467, "grad_norm": 0.060189738147158875, "learning_rate": 0.00019169758021293708, "loss": 0.859, "step": 42340 }, { "epoch": 0.7435172668059481, "grad_norm": 0.08681181231403931, "learning_rate": 0.00019169311254703614, "loss": 0.854, "step": 42350 }, { "epoch": 0.7436928316859496, "grad_norm": 0.06436478244627117, "learning_rate": 0.0001916886437320361, "loss": 0.8537, "step": 42360 }, { "epoch": 0.7438683965659509, "grad_norm": 0.057444850744712325, "learning_rate": 0.00019168417376799353, "loss": 0.8603, "step": 42370 }, { "epoch": 0.7440439614459523, "grad_norm": 0.061900612484627714, "learning_rate": 0.0001916797026549651, "loss": 0.8445, "step": 42380 }, { "epoch": 0.7442195263259538, "grad_norm": 0.06594312150000915, "learning_rate": 0.00019167523039300747, "loss": 0.8583, "step": 42390 }, { "epoch": 0.7443950912059552, "grad_norm": 0.061792541172385064, "learning_rate": 0.00019167075698217729, "loss": 0.8567, "step": 42400 }, { "epoch": 0.7445706560859565, "grad_norm": 0.08174376091114989, "learning_rate": 0.00019166628242253128, "loss": 0.8536, "step": 42410 }, { "epoch": 0.744746220965958, "grad_norm": 0.05322973512844875, "learning_rate": 0.00019166180671412606, "loss": 0.8528, "step": 42420 }, { "epoch": 0.7449217858459594, "grad_norm": 0.0697307552313755, "learning_rate": 0.00019165732985701838, "loss": 0.8487, "step": 42430 }, { "epoch": 0.7450973507259607, "grad_norm": 0.06935201258294037, "learning_rate": 0.0001916528518512649, "loss": 0.8502, "step": 42440 }, { "epoch": 0.7452729156059622, "grad_norm": 0.06823494772913313, "learning_rate": 0.0001916483726969225, "loss": 0.8485, "step": 42450 }, { "epoch": 0.7454484804859636, "grad_norm": 0.06359380951137288, "learning_rate": 0.00019164389239404784, "loss": 0.8535, "step": 42460 }, { "epoch": 0.7456240453659649, "grad_norm": 0.06571564164702312, "learning_rate": 0.00019163941094269764, "loss": 0.8476, "step": 42470 }, { "epoch": 0.7457996102459664, "grad_norm": 0.05341484680142433, "learning_rate": 0.00019163492834292875, "loss": 0.8551, "step": 42480 }, { "epoch": 0.7459751751259678, "grad_norm": 0.07029233566397124, "learning_rate": 0.00019163044459479794, "loss": 0.8584, "step": 42490 }, { "epoch": 0.7461507400059693, "grad_norm": 0.05802606678474267, "learning_rate": 0.00019162595969836205, "loss": 0.8553, "step": 42500 }, { "epoch": 0.7463263048859706, "grad_norm": 0.05695427603705178, "learning_rate": 0.00019162147365367786, "loss": 0.845, "step": 42510 }, { "epoch": 0.746501869765972, "grad_norm": 0.07250464756465212, "learning_rate": 0.00019161698646080226, "loss": 0.8487, "step": 42520 }, { "epoch": 0.7466774346459735, "grad_norm": 0.06152934063463218, "learning_rate": 0.00019161249811979207, "loss": 0.8547, "step": 42530 }, { "epoch": 0.7468529995259748, "grad_norm": 0.07044498816886662, "learning_rate": 0.00019160800863070413, "loss": 0.8545, "step": 42540 }, { "epoch": 0.7470285644059762, "grad_norm": 0.0974201836219756, "learning_rate": 0.00019160351799359538, "loss": 0.8491, "step": 42550 }, { "epoch": 0.7472041292859777, "grad_norm": 0.048148701164463764, "learning_rate": 0.0001915990262085227, "loss": 0.8552, "step": 42560 }, { "epoch": 0.747379694165979, "grad_norm": 0.06354992156007205, "learning_rate": 0.00019159453327554298, "loss": 0.8574, "step": 42570 }, { "epoch": 0.7475552590459804, "grad_norm": 0.05283883831345714, "learning_rate": 0.00019159003919471318, "loss": 0.8554, "step": 42580 }, { "epoch": 0.7477308239259819, "grad_norm": 0.05041263254292258, "learning_rate": 0.00019158554396609022, "loss": 0.8494, "step": 42590 }, { "epoch": 0.7479063888059833, "grad_norm": 0.06162119155343058, "learning_rate": 0.00019158104758973103, "loss": 0.8635, "step": 42600 }, { "epoch": 0.7480819536859846, "grad_norm": 0.06816861916329131, "learning_rate": 0.00019157655006569262, "loss": 0.852, "step": 42610 }, { "epoch": 0.7482575185659861, "grad_norm": 0.06504041313408275, "learning_rate": 0.00019157205139403195, "loss": 0.8509, "step": 42620 }, { "epoch": 0.7484330834459875, "grad_norm": 0.06753825041295056, "learning_rate": 0.00019156755157480603, "loss": 0.8505, "step": 42630 }, { "epoch": 0.7486086483259888, "grad_norm": 0.06268828063413207, "learning_rate": 0.00019156305060807191, "loss": 0.8539, "step": 42640 }, { "epoch": 0.7487842132059903, "grad_norm": 0.06372893977689925, "learning_rate": 0.00019155854849388658, "loss": 0.852, "step": 42650 }, { "epoch": 0.7489597780859917, "grad_norm": 0.06041978434754767, "learning_rate": 0.00019155404523230705, "loss": 0.8501, "step": 42660 }, { "epoch": 0.749135342965993, "grad_norm": 0.0604424298778141, "learning_rate": 0.00019154954082339042, "loss": 0.8566, "step": 42670 }, { "epoch": 0.7493109078459945, "grad_norm": 0.059110150962359775, "learning_rate": 0.00019154503526719378, "loss": 0.8575, "step": 42680 }, { "epoch": 0.7494864727259959, "grad_norm": 0.060011944583619185, "learning_rate": 0.00019154052856377418, "loss": 0.8436, "step": 42690 }, { "epoch": 0.7496620376059973, "grad_norm": 0.05819662913692565, "learning_rate": 0.0001915360207131887, "loss": 0.8464, "step": 42700 }, { "epoch": 0.7498376024859987, "grad_norm": 0.06300363679996465, "learning_rate": 0.00019153151171549451, "loss": 0.8598, "step": 42710 }, { "epoch": 0.7500131673660001, "grad_norm": 0.06886949302323106, "learning_rate": 0.00019152700157074872, "loss": 0.8514, "step": 42720 }, { "epoch": 0.7501887322460015, "grad_norm": 0.07243209440370177, "learning_rate": 0.00019152249027900846, "loss": 0.8495, "step": 42730 }, { "epoch": 0.7503642971260029, "grad_norm": 0.06880769420176851, "learning_rate": 0.0001915179778403309, "loss": 0.8512, "step": 42740 }, { "epoch": 0.7505398620060043, "grad_norm": 0.053201257541854584, "learning_rate": 0.00019151346425477322, "loss": 0.8553, "step": 42750 }, { "epoch": 0.7507154268860057, "grad_norm": 0.06682259123974979, "learning_rate": 0.00019150894952239263, "loss": 0.8669, "step": 42760 }, { "epoch": 0.7508909917660072, "grad_norm": 0.060071405810216943, "learning_rate": 0.00019150443364324626, "loss": 0.8538, "step": 42770 }, { "epoch": 0.7510665566460085, "grad_norm": 0.06637404349142398, "learning_rate": 0.0001914999166173914, "loss": 0.8485, "step": 42780 }, { "epoch": 0.7512421215260099, "grad_norm": 0.0970489456885728, "learning_rate": 0.00019149539844488524, "loss": 0.8542, "step": 42790 }, { "epoch": 0.7514176864060114, "grad_norm": 0.13836465412392815, "learning_rate": 0.00019149087912578503, "loss": 0.8512, "step": 42800 }, { "epoch": 0.7515932512860127, "grad_norm": 0.05112321718768301, "learning_rate": 0.00019148635866014804, "loss": 0.8486, "step": 42810 }, { "epoch": 0.7517688161660141, "grad_norm": 0.07153757206206239, "learning_rate": 0.00019148183704803155, "loss": 0.8563, "step": 42820 }, { "epoch": 0.7519443810460156, "grad_norm": 0.09040642235020295, "learning_rate": 0.00019147731428949282, "loss": 0.851, "step": 42830 }, { "epoch": 0.7521199459260169, "grad_norm": 0.05708291779687337, "learning_rate": 0.00019147279038458922, "loss": 0.8483, "step": 42840 }, { "epoch": 0.7522955108060184, "grad_norm": 0.048069040288879435, "learning_rate": 0.00019146826533337802, "loss": 0.8578, "step": 42850 }, { "epoch": 0.7524710756860198, "grad_norm": 0.051950612145056245, "learning_rate": 0.00019146373913591654, "loss": 0.8493, "step": 42860 }, { "epoch": 0.7526466405660212, "grad_norm": 0.06367300066290818, "learning_rate": 0.00019145921179226217, "loss": 0.8509, "step": 42870 }, { "epoch": 0.7528222054460226, "grad_norm": 0.05603438033476271, "learning_rate": 0.00019145468330247225, "loss": 0.85, "step": 42880 }, { "epoch": 0.752997770326024, "grad_norm": 0.05455794232045381, "learning_rate": 0.00019145015366660412, "loss": 0.8475, "step": 42890 }, { "epoch": 0.7531733352060254, "grad_norm": 0.1022504540453588, "learning_rate": 0.00019144562288471526, "loss": 0.849, "step": 42900 }, { "epoch": 0.7533489000860268, "grad_norm": 0.07481500100964095, "learning_rate": 0.00019144109095686298, "loss": 0.8482, "step": 42910 }, { "epoch": 0.7535244649660282, "grad_norm": 0.05393901041883641, "learning_rate": 0.00019143655788310474, "loss": 0.8558, "step": 42920 }, { "epoch": 0.7537000298460296, "grad_norm": 0.05751725945045717, "learning_rate": 0.000191432023663498, "loss": 0.8545, "step": 42930 }, { "epoch": 0.753875594726031, "grad_norm": 0.09838444129726343, "learning_rate": 0.0001914274882981002, "loss": 0.8523, "step": 42940 }, { "epoch": 0.7540511596060324, "grad_norm": 0.050764922188395206, "learning_rate": 0.00019142295178696878, "loss": 0.8499, "step": 42950 }, { "epoch": 0.7542267244860338, "grad_norm": 0.07497254169517446, "learning_rate": 0.0001914184141301612, "loss": 0.8624, "step": 42960 }, { "epoch": 0.7544022893660353, "grad_norm": 0.0610058609466335, "learning_rate": 0.00019141387532773508, "loss": 0.8547, "step": 42970 }, { "epoch": 0.7545778542460366, "grad_norm": 0.051617165619585126, "learning_rate": 0.00019140933537974776, "loss": 0.8565, "step": 42980 }, { "epoch": 0.754753419126038, "grad_norm": 0.05675798193752236, "learning_rate": 0.00019140479428625683, "loss": 0.851, "step": 42990 }, { "epoch": 0.7549289840060395, "grad_norm": 0.08674319007110397, "learning_rate": 0.00019140025204731983, "loss": 0.8541, "step": 43000 }, { "epoch": 0.7551045488860408, "grad_norm": 0.06139725990199388, "learning_rate": 0.00019139570866299433, "loss": 0.858, "step": 43010 }, { "epoch": 0.7552801137660422, "grad_norm": 0.05900332684373865, "learning_rate": 0.00019139116413333787, "loss": 0.8606, "step": 43020 }, { "epoch": 0.7554556786460437, "grad_norm": 0.11077976642458771, "learning_rate": 0.00019138661845840806, "loss": 0.8489, "step": 43030 }, { "epoch": 0.755631243526045, "grad_norm": 0.05868577398641113, "learning_rate": 0.00019138207163826244, "loss": 0.847, "step": 43040 }, { "epoch": 0.7558068084060464, "grad_norm": 0.04742798665858617, "learning_rate": 0.00019137752367295868, "loss": 0.8529, "step": 43050 }, { "epoch": 0.7559823732860479, "grad_norm": 0.07155013703865064, "learning_rate": 0.0001913729745625543, "loss": 0.8536, "step": 43060 }, { "epoch": 0.7561579381660493, "grad_norm": 0.09152997217188928, "learning_rate": 0.0001913684243071071, "loss": 0.8514, "step": 43070 }, { "epoch": 0.7563335030460506, "grad_norm": 0.0893512509356903, "learning_rate": 0.00019136387290667463, "loss": 0.8417, "step": 43080 }, { "epoch": 0.7565090679260521, "grad_norm": 0.053045948997844296, "learning_rate": 0.00019135932036131457, "loss": 0.8479, "step": 43090 }, { "epoch": 0.7566846328060535, "grad_norm": 0.06054906529493519, "learning_rate": 0.00019135476667108458, "loss": 0.8486, "step": 43100 }, { "epoch": 0.7568601976860548, "grad_norm": 0.05856809846315758, "learning_rate": 0.00019135021183604238, "loss": 0.8551, "step": 43110 }, { "epoch": 0.7570357625660563, "grad_norm": 0.06416588555842025, "learning_rate": 0.0001913456558562457, "loss": 0.8554, "step": 43120 }, { "epoch": 0.7572113274460577, "grad_norm": 0.08976666528875044, "learning_rate": 0.00019134109873175222, "loss": 0.8447, "step": 43130 }, { "epoch": 0.757386892326059, "grad_norm": 0.06181263925021499, "learning_rate": 0.00019133654046261974, "loss": 0.8536, "step": 43140 }, { "epoch": 0.7575624572060605, "grad_norm": 0.085170097956014, "learning_rate": 0.00019133198104890595, "loss": 0.8508, "step": 43150 }, { "epoch": 0.7577380220860619, "grad_norm": 0.05963783007742476, "learning_rate": 0.00019132742049066866, "loss": 0.8571, "step": 43160 }, { "epoch": 0.7579135869660633, "grad_norm": 0.05502548248638751, "learning_rate": 0.00019132285878796564, "loss": 0.8544, "step": 43170 }, { "epoch": 0.7580891518460647, "grad_norm": 0.06258926822093153, "learning_rate": 0.0001913182959408547, "loss": 0.8527, "step": 43180 }, { "epoch": 0.7582647167260661, "grad_norm": 0.075405304302208, "learning_rate": 0.00019131373194939362, "loss": 0.8599, "step": 43190 }, { "epoch": 0.7584402816060675, "grad_norm": 0.05102254964403599, "learning_rate": 0.00019130916681364023, "loss": 0.8598, "step": 43200 }, { "epoch": 0.758615846486069, "grad_norm": 0.0633227979244252, "learning_rate": 0.00019130460053365243, "loss": 0.8578, "step": 43210 }, { "epoch": 0.7587914113660703, "grad_norm": 0.07162051105052118, "learning_rate": 0.00019130003310948804, "loss": 0.8519, "step": 43220 }, { "epoch": 0.7589669762460718, "grad_norm": 0.0751667719740759, "learning_rate": 0.00019129546454120486, "loss": 0.8459, "step": 43230 }, { "epoch": 0.7591425411260732, "grad_norm": 0.06265616918771903, "learning_rate": 0.00019129089482886093, "loss": 0.8557, "step": 43240 }, { "epoch": 0.7593181060060745, "grad_norm": 0.07799194320498737, "learning_rate": 0.00019128632397251397, "loss": 0.8549, "step": 43250 }, { "epoch": 0.759493670886076, "grad_norm": 0.0535220205843802, "learning_rate": 0.00019128175197222205, "loss": 0.8584, "step": 43260 }, { "epoch": 0.7596692357660774, "grad_norm": 0.055485579533433284, "learning_rate": 0.00019127717882804298, "loss": 0.8561, "step": 43270 }, { "epoch": 0.7598448006460787, "grad_norm": 0.05889931543284124, "learning_rate": 0.0001912726045400348, "loss": 0.8514, "step": 43280 }, { "epoch": 0.7600203655260802, "grad_norm": 0.05550677484685841, "learning_rate": 0.0001912680291082554, "loss": 0.857, "step": 43290 }, { "epoch": 0.7601959304060816, "grad_norm": 0.06464537934148877, "learning_rate": 0.00019126345253276276, "loss": 0.8535, "step": 43300 }, { "epoch": 0.7603714952860829, "grad_norm": 0.06577644805195315, "learning_rate": 0.0001912588748136149, "loss": 0.8557, "step": 43310 }, { "epoch": 0.7605470601660844, "grad_norm": 0.04599574391884156, "learning_rate": 0.00019125429595086976, "loss": 0.8589, "step": 43320 }, { "epoch": 0.7607226250460858, "grad_norm": 0.0876172595765683, "learning_rate": 0.00019124971594458543, "loss": 0.8557, "step": 43330 }, { "epoch": 0.7608981899260872, "grad_norm": 0.06091396322675385, "learning_rate": 0.00019124513479481988, "loss": 0.8521, "step": 43340 }, { "epoch": 0.7610737548060886, "grad_norm": 0.0958461469225102, "learning_rate": 0.0001912405525016312, "loss": 0.8503, "step": 43350 }, { "epoch": 0.76124931968609, "grad_norm": 0.06139837545230879, "learning_rate": 0.0001912359690650774, "loss": 0.8577, "step": 43360 }, { "epoch": 0.7614248845660914, "grad_norm": 0.04180305019279672, "learning_rate": 0.0001912313844852166, "loss": 0.8575, "step": 43370 }, { "epoch": 0.7616004494460928, "grad_norm": 0.06424519899493925, "learning_rate": 0.00019122679876210685, "loss": 0.8598, "step": 43380 }, { "epoch": 0.7617760143260942, "grad_norm": 0.0555589852013795, "learning_rate": 0.00019122221189580627, "loss": 0.8522, "step": 43390 }, { "epoch": 0.7619515792060956, "grad_norm": 0.0641139232562304, "learning_rate": 0.000191217623886373, "loss": 0.8611, "step": 43400 }, { "epoch": 0.762127144086097, "grad_norm": 0.08552525915987269, "learning_rate": 0.00019121303473386512, "loss": 0.8505, "step": 43410 }, { "epoch": 0.7623027089660984, "grad_norm": 0.06770031122780662, "learning_rate": 0.0001912084444383408, "loss": 0.8593, "step": 43420 }, { "epoch": 0.7624782738460998, "grad_norm": 0.06834292333081758, "learning_rate": 0.0001912038529998582, "loss": 0.8515, "step": 43430 }, { "epoch": 0.7626538387261013, "grad_norm": 0.08720453934868087, "learning_rate": 0.00019119926041847552, "loss": 0.854, "step": 43440 }, { "epoch": 0.7628294036061026, "grad_norm": 0.06323105895939206, "learning_rate": 0.00019119466669425088, "loss": 0.8565, "step": 43450 }, { "epoch": 0.763004968486104, "grad_norm": 0.0545830584378564, "learning_rate": 0.00019119007182724255, "loss": 0.8535, "step": 43460 }, { "epoch": 0.7631805333661055, "grad_norm": 0.06923936294047123, "learning_rate": 0.00019118547581750873, "loss": 0.8561, "step": 43470 }, { "epoch": 0.7633560982461068, "grad_norm": 0.05234530076885749, "learning_rate": 0.00019118087866510763, "loss": 0.853, "step": 43480 }, { "epoch": 0.7635316631261082, "grad_norm": 0.07569626401341849, "learning_rate": 0.00019117628037009754, "loss": 0.8565, "step": 43490 }, { "epoch": 0.7637072280061097, "grad_norm": 0.06133328432504088, "learning_rate": 0.0001911716809325367, "loss": 0.8523, "step": 43500 }, { "epoch": 0.763882792886111, "grad_norm": 0.07274960535026694, "learning_rate": 0.00019116708035248333, "loss": 0.8567, "step": 43510 }, { "epoch": 0.7640583577661124, "grad_norm": 0.0509351578216849, "learning_rate": 0.00019116247862999584, "loss": 0.843, "step": 43520 }, { "epoch": 0.7642339226461139, "grad_norm": 0.05042322822634551, "learning_rate": 0.0001911578757651324, "loss": 0.8456, "step": 43530 }, { "epoch": 0.7644094875261153, "grad_norm": 0.05356871696092514, "learning_rate": 0.00019115327175795143, "loss": 0.8526, "step": 43540 }, { "epoch": 0.7645850524061166, "grad_norm": 0.07239790615644189, "learning_rate": 0.00019114866660851124, "loss": 0.8429, "step": 43550 }, { "epoch": 0.7647606172861181, "grad_norm": 0.07453555750697899, "learning_rate": 0.00019114406031687015, "loss": 0.852, "step": 43560 }, { "epoch": 0.7649361821661195, "grad_norm": 0.08721007310983454, "learning_rate": 0.00019113945288308653, "loss": 0.8492, "step": 43570 }, { "epoch": 0.765111747046121, "grad_norm": 0.05042548215832969, "learning_rate": 0.0001911348443072188, "loss": 0.8569, "step": 43580 }, { "epoch": 0.7652873119261223, "grad_norm": 0.05200012309722896, "learning_rate": 0.00019113023458932528, "loss": 0.8516, "step": 43590 }, { "epoch": 0.7654628768061237, "grad_norm": 0.05703222553952181, "learning_rate": 0.00019112562372946445, "loss": 0.8557, "step": 43600 }, { "epoch": 0.7656384416861252, "grad_norm": 0.06320190677517377, "learning_rate": 0.00019112101172769468, "loss": 0.8542, "step": 43610 }, { "epoch": 0.7658140065661265, "grad_norm": 0.059391985254567736, "learning_rate": 0.00019111639858407443, "loss": 0.8545, "step": 43620 }, { "epoch": 0.7659895714461279, "grad_norm": 0.07383790747672873, "learning_rate": 0.0001911117842986621, "loss": 0.8447, "step": 43630 }, { "epoch": 0.7661651363261294, "grad_norm": 0.058010825468149334, "learning_rate": 0.0001911071688715162, "loss": 0.8464, "step": 43640 }, { "epoch": 0.7663407012061307, "grad_norm": 0.06686043542023572, "learning_rate": 0.00019110255230269524, "loss": 0.8524, "step": 43650 }, { "epoch": 0.7665162660861321, "grad_norm": 0.07255565478487642, "learning_rate": 0.00019109793459225763, "loss": 0.8468, "step": 43660 }, { "epoch": 0.7666918309661336, "grad_norm": 0.06190737457435144, "learning_rate": 0.00019109331574026196, "loss": 0.8592, "step": 43670 }, { "epoch": 0.766867395846135, "grad_norm": 0.055201367509751395, "learning_rate": 0.00019108869574676671, "loss": 0.8465, "step": 43680 }, { "epoch": 0.7670429607261363, "grad_norm": 0.06813231032629243, "learning_rate": 0.0001910840746118304, "loss": 0.8474, "step": 43690 }, { "epoch": 0.7672185256061378, "grad_norm": 0.060602642276489045, "learning_rate": 0.00019107945233551164, "loss": 0.8546, "step": 43700 }, { "epoch": 0.7673940904861392, "grad_norm": 0.07468021258741334, "learning_rate": 0.00019107482891786892, "loss": 0.8459, "step": 43710 }, { "epoch": 0.7675696553661405, "grad_norm": 0.08507546848009234, "learning_rate": 0.00019107020435896082, "loss": 0.8485, "step": 43720 }, { "epoch": 0.767745220246142, "grad_norm": 0.06518135960371774, "learning_rate": 0.00019106557865884602, "loss": 0.8531, "step": 43730 }, { "epoch": 0.7679207851261434, "grad_norm": 0.06309733927149293, "learning_rate": 0.0001910609518175831, "loss": 0.8532, "step": 43740 }, { "epoch": 0.7680963500061447, "grad_norm": 0.06900257575400766, "learning_rate": 0.0001910563238352306, "loss": 0.8515, "step": 43750 }, { "epoch": 0.7682719148861462, "grad_norm": 0.052757516885484565, "learning_rate": 0.00019105169471184726, "loss": 0.8454, "step": 43760 }, { "epoch": 0.7684474797661476, "grad_norm": 0.06038105679676378, "learning_rate": 0.00019104706444749168, "loss": 0.8493, "step": 43770 }, { "epoch": 0.768623044646149, "grad_norm": 0.058891281806450056, "learning_rate": 0.00019104243304222251, "loss": 0.8584, "step": 43780 }, { "epoch": 0.7687986095261504, "grad_norm": 0.05486035308692534, "learning_rate": 0.00019103780049609847, "loss": 0.8541, "step": 43790 }, { "epoch": 0.7689741744061518, "grad_norm": 0.06314697880451292, "learning_rate": 0.00019103316680917824, "loss": 0.8507, "step": 43800 }, { "epoch": 0.7691497392861532, "grad_norm": 0.06004674140562878, "learning_rate": 0.00019102853198152056, "loss": 0.8576, "step": 43810 }, { "epoch": 0.7693253041661546, "grad_norm": 0.06310113365605874, "learning_rate": 0.0001910238960131841, "loss": 0.8518, "step": 43820 }, { "epoch": 0.769500869046156, "grad_norm": 0.05784739438217662, "learning_rate": 0.00019101925890422766, "loss": 0.8579, "step": 43830 }, { "epoch": 0.7696764339261574, "grad_norm": 0.06885898332132023, "learning_rate": 0.0001910146206547099, "loss": 0.8467, "step": 43840 }, { "epoch": 0.7698519988061588, "grad_norm": 0.08235296008850172, "learning_rate": 0.00019100998126468966, "loss": 0.8532, "step": 43850 }, { "epoch": 0.7700275636861602, "grad_norm": 0.05562841696630697, "learning_rate": 0.00019100534073422572, "loss": 0.8566, "step": 43860 }, { "epoch": 0.7702031285661616, "grad_norm": 0.0634956229979075, "learning_rate": 0.00019100069906337688, "loss": 0.8471, "step": 43870 }, { "epoch": 0.770378693446163, "grad_norm": 0.0692280618064705, "learning_rate": 0.00019099605625220195, "loss": 0.8531, "step": 43880 }, { "epoch": 0.7705542583261644, "grad_norm": 0.09234540335188632, "learning_rate": 0.0001909914123007597, "loss": 0.8587, "step": 43890 }, { "epoch": 0.7707298232061658, "grad_norm": 0.06055945697706788, "learning_rate": 0.000190986767209109, "loss": 0.8554, "step": 43900 }, { "epoch": 0.7709053880861673, "grad_norm": 0.08376353456706266, "learning_rate": 0.00019098212097730873, "loss": 0.8527, "step": 43910 }, { "epoch": 0.7710809529661686, "grad_norm": 0.07920234826178658, "learning_rate": 0.00019097747360541777, "loss": 0.8522, "step": 43920 }, { "epoch": 0.77125651784617, "grad_norm": 0.06156897503603408, "learning_rate": 0.0001909728250934949, "loss": 0.8529, "step": 43930 }, { "epoch": 0.7714320827261715, "grad_norm": 0.12418466254872228, "learning_rate": 0.00019096817544159917, "loss": 0.8498, "step": 43940 }, { "epoch": 0.7716076476061728, "grad_norm": 0.078904928168104, "learning_rate": 0.0001909635246497894, "loss": 0.8502, "step": 43950 }, { "epoch": 0.7717832124861743, "grad_norm": 0.08014012510684952, "learning_rate": 0.0001909588727181245, "loss": 0.8542, "step": 43960 }, { "epoch": 0.7719587773661757, "grad_norm": 0.07506632874216354, "learning_rate": 0.00019095421964666345, "loss": 0.8487, "step": 43970 }, { "epoch": 0.772134342246177, "grad_norm": 0.059462781597757565, "learning_rate": 0.0001909495654354652, "loss": 0.8515, "step": 43980 }, { "epoch": 0.7723099071261785, "grad_norm": 0.0631024043440751, "learning_rate": 0.0001909449100845887, "loss": 0.8487, "step": 43990 }, { "epoch": 0.7724854720061799, "grad_norm": 0.08129650861027936, "learning_rate": 0.00019094025359409303, "loss": 0.8604, "step": 44000 }, { "epoch": 0.7726610368861813, "grad_norm": 0.09414710673669144, "learning_rate": 0.00019093559596403703, "loss": 0.852, "step": 44010 }, { "epoch": 0.7728366017661827, "grad_norm": 0.08823169545683522, "learning_rate": 0.00019093093719447984, "loss": 0.854, "step": 44020 }, { "epoch": 0.7730121666461841, "grad_norm": 0.05759452026997099, "learning_rate": 0.0001909262772854804, "loss": 0.8476, "step": 44030 }, { "epoch": 0.7731877315261855, "grad_norm": 0.045540678865752415, "learning_rate": 0.00019092161623709782, "loss": 0.8601, "step": 44040 }, { "epoch": 0.773363296406187, "grad_norm": 0.07700498430403721, "learning_rate": 0.00019091695404939112, "loss": 0.8533, "step": 44050 }, { "epoch": 0.7735388612861883, "grad_norm": 0.07056329290668334, "learning_rate": 0.0001909122907224194, "loss": 0.8524, "step": 44060 }, { "epoch": 0.7737144261661897, "grad_norm": 0.05806047574749843, "learning_rate": 0.0001909076262562417, "loss": 0.8523, "step": 44070 }, { "epoch": 0.7738899910461912, "grad_norm": 0.0746612113288889, "learning_rate": 0.00019090296065091714, "loss": 0.8533, "step": 44080 }, { "epoch": 0.7740655559261925, "grad_norm": 0.05002577678142353, "learning_rate": 0.00019089829390650482, "loss": 0.8485, "step": 44090 }, { "epoch": 0.7742411208061939, "grad_norm": 0.07242245639658373, "learning_rate": 0.00019089362602306393, "loss": 0.8516, "step": 44100 }, { "epoch": 0.7744166856861954, "grad_norm": 0.08380074614653389, "learning_rate": 0.00019088895700065356, "loss": 0.8495, "step": 44110 }, { "epoch": 0.7745922505661967, "grad_norm": 0.06970305940441722, "learning_rate": 0.00019088428683933288, "loss": 0.8531, "step": 44120 }, { "epoch": 0.7747678154461981, "grad_norm": 0.055188050487441756, "learning_rate": 0.00019087961553916104, "loss": 0.8541, "step": 44130 }, { "epoch": 0.7749433803261996, "grad_norm": 0.06563713931196974, "learning_rate": 0.00019087494310019728, "loss": 0.8551, "step": 44140 }, { "epoch": 0.775118945206201, "grad_norm": 0.07128414529342648, "learning_rate": 0.00019087026952250072, "loss": 0.8471, "step": 44150 }, { "epoch": 0.7752945100862023, "grad_norm": 0.08953288783930324, "learning_rate": 0.00019086559480613067, "loss": 0.8519, "step": 44160 }, { "epoch": 0.7754700749662038, "grad_norm": 0.0655321909468844, "learning_rate": 0.0001908609189511463, "loss": 0.8573, "step": 44170 }, { "epoch": 0.7756456398462052, "grad_norm": 0.05446819197474134, "learning_rate": 0.00019085624195760683, "loss": 0.8462, "step": 44180 }, { "epoch": 0.7758212047262065, "grad_norm": 0.06705855184861718, "learning_rate": 0.0001908515638255716, "loss": 0.8502, "step": 44190 }, { "epoch": 0.775996769606208, "grad_norm": 0.06142147638291985, "learning_rate": 0.0001908468845550998, "loss": 0.8528, "step": 44200 }, { "epoch": 0.7761723344862094, "grad_norm": 0.055073382392248134, "learning_rate": 0.00019084220414625075, "loss": 0.8594, "step": 44210 }, { "epoch": 0.7763478993662107, "grad_norm": 0.054326307380996176, "learning_rate": 0.00019083752259908376, "loss": 0.8481, "step": 44220 }, { "epoch": 0.7765234642462122, "grad_norm": 0.04876220891614223, "learning_rate": 0.00019083283991365817, "loss": 0.8491, "step": 44230 }, { "epoch": 0.7766990291262136, "grad_norm": 0.06388136463932291, "learning_rate": 0.00019082815609003327, "loss": 0.8576, "step": 44240 }, { "epoch": 0.776874594006215, "grad_norm": 0.08918023260377533, "learning_rate": 0.0001908234711282684, "loss": 0.8521, "step": 44250 }, { "epoch": 0.7770501588862164, "grad_norm": 0.060720894170926756, "learning_rate": 0.00019081878502842295, "loss": 0.8597, "step": 44260 }, { "epoch": 0.7772257237662178, "grad_norm": 0.05536199159828857, "learning_rate": 0.0001908140977905563, "loss": 0.8598, "step": 44270 }, { "epoch": 0.7774012886462192, "grad_norm": 0.06761192197851912, "learning_rate": 0.00019080940941472777, "loss": 0.8542, "step": 44280 }, { "epoch": 0.7775768535262206, "grad_norm": 0.05491260735687094, "learning_rate": 0.00019080471990099685, "loss": 0.8598, "step": 44290 }, { "epoch": 0.777752418406222, "grad_norm": 0.058908477409939784, "learning_rate": 0.00019080002924942287, "loss": 0.8514, "step": 44300 }, { "epoch": 0.7779279832862235, "grad_norm": 0.06055027996783998, "learning_rate": 0.00019079533746006538, "loss": 0.8537, "step": 44310 }, { "epoch": 0.7781035481662248, "grad_norm": 0.060964463957886836, "learning_rate": 0.00019079064453298371, "loss": 0.8552, "step": 44320 }, { "epoch": 0.7782791130462262, "grad_norm": 0.05964295129499069, "learning_rate": 0.00019078595046823738, "loss": 0.8484, "step": 44330 }, { "epoch": 0.7784546779262277, "grad_norm": 0.08503996924511849, "learning_rate": 0.00019078125526588585, "loss": 0.8526, "step": 44340 }, { "epoch": 0.7786302428062291, "grad_norm": 0.07124893767224534, "learning_rate": 0.0001907765589259886, "loss": 0.8684, "step": 44350 }, { "epoch": 0.7788058076862304, "grad_norm": 0.06456637241861683, "learning_rate": 0.00019077186144860514, "loss": 0.8494, "step": 44360 }, { "epoch": 0.7789813725662319, "grad_norm": 0.05518608520345722, "learning_rate": 0.000190767162833795, "loss": 0.8549, "step": 44370 }, { "epoch": 0.7791569374462333, "grad_norm": 0.07328874866011087, "learning_rate": 0.0001907624630816177, "loss": 0.8551, "step": 44380 }, { "epoch": 0.7793325023262346, "grad_norm": 0.07853929037049312, "learning_rate": 0.0001907577621921328, "loss": 0.8534, "step": 44390 }, { "epoch": 0.7795080672062361, "grad_norm": 0.07230949648987116, "learning_rate": 0.00019075306016539984, "loss": 0.8546, "step": 44400 }, { "epoch": 0.7796836320862375, "grad_norm": 0.07984786011580942, "learning_rate": 0.00019074835700147843, "loss": 0.8556, "step": 44410 }, { "epoch": 0.7798591969662388, "grad_norm": 0.05159836833100242, "learning_rate": 0.00019074365270042807, "loss": 0.8604, "step": 44420 }, { "epoch": 0.7800347618462403, "grad_norm": 0.0595292797613431, "learning_rate": 0.0001907389472623085, "loss": 0.8593, "step": 44430 }, { "epoch": 0.7802103267262417, "grad_norm": 0.0817672021982629, "learning_rate": 0.00019073424068717925, "loss": 0.8515, "step": 44440 }, { "epoch": 0.780385891606243, "grad_norm": 0.061914671942300874, "learning_rate": 0.00019072953297509998, "loss": 0.8416, "step": 44450 }, { "epoch": 0.7805614564862445, "grad_norm": 0.07186404087381086, "learning_rate": 0.00019072482412613032, "loss": 0.8582, "step": 44460 }, { "epoch": 0.7807370213662459, "grad_norm": 0.06711762271432485, "learning_rate": 0.00019072011414032996, "loss": 0.8527, "step": 44470 }, { "epoch": 0.7809125862462473, "grad_norm": 0.0637638883152513, "learning_rate": 0.0001907154030177585, "loss": 0.8536, "step": 44480 }, { "epoch": 0.7810881511262487, "grad_norm": 0.11699546982772245, "learning_rate": 0.00019071069075847575, "loss": 0.8435, "step": 44490 }, { "epoch": 0.7812637160062501, "grad_norm": 0.080135721187763, "learning_rate": 0.00019070597736254135, "loss": 0.8468, "step": 44500 }, { "epoch": 0.7814392808862515, "grad_norm": 0.05959723430712761, "learning_rate": 0.00019070126283001498, "loss": 0.8521, "step": 44510 }, { "epoch": 0.781614845766253, "grad_norm": 0.09812216239104536, "learning_rate": 0.00019069654716095645, "loss": 0.8578, "step": 44520 }, { "epoch": 0.7817904106462543, "grad_norm": 0.10250255722309344, "learning_rate": 0.00019069183035542547, "loss": 0.8448, "step": 44530 }, { "epoch": 0.7819659755262557, "grad_norm": 0.05778334435135411, "learning_rate": 0.00019068711241348186, "loss": 0.8551, "step": 44540 }, { "epoch": 0.7821415404062572, "grad_norm": 0.052092199279936566, "learning_rate": 0.0001906823933351853, "loss": 0.8512, "step": 44550 }, { "epoch": 0.7823171052862585, "grad_norm": 0.052814972343927485, "learning_rate": 0.0001906776731205956, "loss": 0.8491, "step": 44560 }, { "epoch": 0.7824926701662599, "grad_norm": 0.059063193637705715, "learning_rate": 0.00019067295176977263, "loss": 0.8493, "step": 44570 }, { "epoch": 0.7826682350462614, "grad_norm": 0.0475682620187915, "learning_rate": 0.00019066822928277618, "loss": 0.8487, "step": 44580 }, { "epoch": 0.7828437999262627, "grad_norm": 0.04747962345145666, "learning_rate": 0.0001906635056596661, "loss": 0.8501, "step": 44590 }, { "epoch": 0.7830193648062641, "grad_norm": 0.0734651274214562, "learning_rate": 0.00019065878090050218, "loss": 0.8564, "step": 44600 }, { "epoch": 0.7831949296862656, "grad_norm": 0.07305158555643522, "learning_rate": 0.00019065405500534434, "loss": 0.8534, "step": 44610 }, { "epoch": 0.783370494566267, "grad_norm": 0.06325821037968286, "learning_rate": 0.00019064932797425242, "loss": 0.8499, "step": 44620 }, { "epoch": 0.7835460594462683, "grad_norm": 0.06737561997340104, "learning_rate": 0.00019064459980728634, "loss": 0.8563, "step": 44630 }, { "epoch": 0.7837216243262698, "grad_norm": 0.07220315677135555, "learning_rate": 0.00019063987050450603, "loss": 0.8529, "step": 44640 }, { "epoch": 0.7838971892062712, "grad_norm": 0.09375034289144679, "learning_rate": 0.00019063514006597136, "loss": 0.8556, "step": 44650 }, { "epoch": 0.7840727540862725, "grad_norm": 0.09103694773730486, "learning_rate": 0.00019063040849174229, "loss": 0.8526, "step": 44660 }, { "epoch": 0.784248318966274, "grad_norm": 0.05598711888784064, "learning_rate": 0.00019062567578187876, "loss": 0.8501, "step": 44670 }, { "epoch": 0.7844238838462754, "grad_norm": 0.06444181986830774, "learning_rate": 0.00019062094193644075, "loss": 0.8575, "step": 44680 }, { "epoch": 0.7845994487262768, "grad_norm": 0.07476056212521108, "learning_rate": 0.00019061620695548823, "loss": 0.8488, "step": 44690 }, { "epoch": 0.7847750136062782, "grad_norm": 0.0600374838034639, "learning_rate": 0.00019061147083908117, "loss": 0.8552, "step": 44700 }, { "epoch": 0.7849505784862796, "grad_norm": 0.07389107271981177, "learning_rate": 0.00019060673358727964, "loss": 0.8485, "step": 44710 }, { "epoch": 0.7851261433662811, "grad_norm": 0.05844123743816953, "learning_rate": 0.00019060199520014358, "loss": 0.8583, "step": 44720 }, { "epoch": 0.7853017082462824, "grad_norm": 0.07247169316153972, "learning_rate": 0.0001905972556777331, "loss": 0.8544, "step": 44730 }, { "epoch": 0.7854772731262838, "grad_norm": 0.11620982744580263, "learning_rate": 0.00019059251502010822, "loss": 0.8521, "step": 44740 }, { "epoch": 0.7856528380062853, "grad_norm": 0.07321410228053757, "learning_rate": 0.000190587773227329, "loss": 0.8526, "step": 44750 }, { "epoch": 0.7858284028862866, "grad_norm": 0.05665206385582303, "learning_rate": 0.00019058303029945552, "loss": 0.8504, "step": 44760 }, { "epoch": 0.786003967766288, "grad_norm": 0.04772442397535011, "learning_rate": 0.00019057828623654787, "loss": 0.8507, "step": 44770 }, { "epoch": 0.7861795326462895, "grad_norm": 0.041550287725110845, "learning_rate": 0.00019057354103866619, "loss": 0.8533, "step": 44780 }, { "epoch": 0.7863550975262908, "grad_norm": 0.09255241627069272, "learning_rate": 0.00019056879470587054, "loss": 0.8522, "step": 44790 }, { "epoch": 0.7865306624062922, "grad_norm": 0.05616334423840382, "learning_rate": 0.00019056404723822112, "loss": 0.852, "step": 44800 }, { "epoch": 0.7867062272862937, "grad_norm": 0.0624341867026592, "learning_rate": 0.00019055929863577806, "loss": 0.8483, "step": 44810 }, { "epoch": 0.7868817921662951, "grad_norm": 0.07407400294540183, "learning_rate": 0.00019055454889860152, "loss": 0.8564, "step": 44820 }, { "epoch": 0.7870573570462964, "grad_norm": 0.057714722404672215, "learning_rate": 0.00019054979802675165, "loss": 0.8503, "step": 44830 }, { "epoch": 0.7872329219262979, "grad_norm": 0.061720097165039976, "learning_rate": 0.0001905450460202887, "loss": 0.8556, "step": 44840 }, { "epoch": 0.7874084868062993, "grad_norm": 0.0706636622638105, "learning_rate": 0.0001905402928792729, "loss": 0.8545, "step": 44850 }, { "epoch": 0.7875840516863006, "grad_norm": 0.06084927031288848, "learning_rate": 0.00019053553860376438, "loss": 0.8503, "step": 44860 }, { "epoch": 0.7877596165663021, "grad_norm": 0.07015971200747814, "learning_rate": 0.00019053078319382341, "loss": 0.8449, "step": 44870 }, { "epoch": 0.7879351814463035, "grad_norm": 0.060582275749050486, "learning_rate": 0.0001905260266495103, "loss": 0.848, "step": 44880 }, { "epoch": 0.7881107463263048, "grad_norm": 0.08838952935912316, "learning_rate": 0.00019052126897088525, "loss": 0.853, "step": 44890 }, { "epoch": 0.7882863112063063, "grad_norm": 0.05070568625717274, "learning_rate": 0.0001905165101580086, "loss": 0.8551, "step": 44900 }, { "epoch": 0.7884618760863077, "grad_norm": 0.09791567411732402, "learning_rate": 0.00019051175021094058, "loss": 0.8611, "step": 44910 }, { "epoch": 0.7886374409663091, "grad_norm": 0.053820492202107176, "learning_rate": 0.00019050698912974156, "loss": 0.8479, "step": 44920 }, { "epoch": 0.7888130058463105, "grad_norm": 0.06491341562977362, "learning_rate": 0.00019050222691447178, "loss": 0.855, "step": 44930 }, { "epoch": 0.7889885707263119, "grad_norm": 0.06249694383187704, "learning_rate": 0.00019049746356519166, "loss": 0.8526, "step": 44940 }, { "epoch": 0.7891641356063133, "grad_norm": 0.07137985874023098, "learning_rate": 0.00019049269908196151, "loss": 0.8652, "step": 44950 }, { "epoch": 0.7893397004863147, "grad_norm": 0.10578055449367747, "learning_rate": 0.00019048793346484173, "loss": 0.8548, "step": 44960 }, { "epoch": 0.7895152653663161, "grad_norm": 0.07460445604909198, "learning_rate": 0.0001904831667138927, "loss": 0.852, "step": 44970 }, { "epoch": 0.7896908302463175, "grad_norm": 0.06207883253497817, "learning_rate": 0.00019047839882917477, "loss": 0.8489, "step": 44980 }, { "epoch": 0.789866395126319, "grad_norm": 0.09828170667825473, "learning_rate": 0.0001904736298107484, "loss": 0.8572, "step": 44990 }, { "epoch": 0.7900419600063203, "grad_norm": 0.05375212629075038, "learning_rate": 0.00019046885965867392, "loss": 0.8493, "step": 45000 }, { "epoch": 0.7902175248863217, "grad_norm": 0.06596709392702883, "learning_rate": 0.00019046408837301192, "loss": 0.856, "step": 45010 }, { "epoch": 0.7903930897663232, "grad_norm": 0.06212117036884291, "learning_rate": 0.00019045931595382276, "loss": 0.8509, "step": 45020 }, { "epoch": 0.7905686546463245, "grad_norm": 0.05318496501497332, "learning_rate": 0.00019045454240116693, "loss": 0.8561, "step": 45030 }, { "epoch": 0.790744219526326, "grad_norm": 0.06210173954896695, "learning_rate": 0.00019044976771510487, "loss": 0.8617, "step": 45040 }, { "epoch": 0.7909197844063274, "grad_norm": 0.054732684085863854, "learning_rate": 0.00019044499189569712, "loss": 0.8524, "step": 45050 }, { "epoch": 0.7910953492863287, "grad_norm": 0.058249813563060124, "learning_rate": 0.0001904402149430042, "loss": 0.8482, "step": 45060 }, { "epoch": 0.7912709141663302, "grad_norm": 0.0674440750207019, "learning_rate": 0.0001904354368570866, "loss": 0.8565, "step": 45070 }, { "epoch": 0.7914464790463316, "grad_norm": 0.05419804825371086, "learning_rate": 0.0001904306576380049, "loss": 0.8504, "step": 45080 }, { "epoch": 0.791622043926333, "grad_norm": 0.05087269315394542, "learning_rate": 0.0001904258772858196, "loss": 0.8525, "step": 45090 }, { "epoch": 0.7917976088063344, "grad_norm": 0.056906375804341436, "learning_rate": 0.0001904210958005913, "loss": 0.8543, "step": 45100 }, { "epoch": 0.7919731736863358, "grad_norm": 0.08422389516273134, "learning_rate": 0.0001904163131823806, "loss": 0.8471, "step": 45110 }, { "epoch": 0.7921487385663372, "grad_norm": 0.07700253492641647, "learning_rate": 0.00019041152943124806, "loss": 0.8547, "step": 45120 }, { "epoch": 0.7923243034463386, "grad_norm": 0.06427242874197718, "learning_rate": 0.0001904067445472543, "loss": 0.8481, "step": 45130 }, { "epoch": 0.79249986832634, "grad_norm": 0.08882837090291437, "learning_rate": 0.00019040195853045996, "loss": 0.8534, "step": 45140 }, { "epoch": 0.7926754332063414, "grad_norm": 0.05604645902201223, "learning_rate": 0.00019039717138092567, "loss": 0.8519, "step": 45150 }, { "epoch": 0.7928509980863429, "grad_norm": 0.07639771741746557, "learning_rate": 0.00019039238309871207, "loss": 0.8457, "step": 45160 }, { "epoch": 0.7930265629663442, "grad_norm": 0.08222794188389493, "learning_rate": 0.0001903875936838799, "loss": 0.8532, "step": 45170 }, { "epoch": 0.7932021278463456, "grad_norm": 0.05643927431072956, "learning_rate": 0.00019038280313648972, "loss": 0.8419, "step": 45180 }, { "epoch": 0.7933776927263471, "grad_norm": 0.06234762497080811, "learning_rate": 0.00019037801145660232, "loss": 0.8508, "step": 45190 }, { "epoch": 0.7935532576063484, "grad_norm": 0.06507530068841298, "learning_rate": 0.00019037321864427838, "loss": 0.8543, "step": 45200 }, { "epoch": 0.7937288224863498, "grad_norm": 0.06852527209216674, "learning_rate": 0.00019036842469957865, "loss": 0.8521, "step": 45210 }, { "epoch": 0.7939043873663513, "grad_norm": 0.09371737526516519, "learning_rate": 0.00019036362962256384, "loss": 0.8473, "step": 45220 }, { "epoch": 0.7940799522463526, "grad_norm": 0.06605842960590787, "learning_rate": 0.0001903588334132947, "loss": 0.8529, "step": 45230 }, { "epoch": 0.794255517126354, "grad_norm": 0.054179401607217834, "learning_rate": 0.00019035403607183205, "loss": 0.8527, "step": 45240 }, { "epoch": 0.7944310820063555, "grad_norm": 0.05315809359419304, "learning_rate": 0.0001903492375982366, "loss": 0.8514, "step": 45250 }, { "epoch": 0.7946066468863568, "grad_norm": 0.07844966882842982, "learning_rate": 0.00019034443799256922, "loss": 0.8512, "step": 45260 }, { "epoch": 0.7947822117663582, "grad_norm": 0.06736568743122677, "learning_rate": 0.00019033963725489068, "loss": 0.8518, "step": 45270 }, { "epoch": 0.7949577766463597, "grad_norm": 0.06033688617736483, "learning_rate": 0.0001903348353852618, "loss": 0.8651, "step": 45280 }, { "epoch": 0.7951333415263611, "grad_norm": 0.06366639060250276, "learning_rate": 0.00019033003238374345, "loss": 0.8549, "step": 45290 }, { "epoch": 0.7953089064063624, "grad_norm": 0.041650680603661304, "learning_rate": 0.00019032522825039648, "loss": 0.8524, "step": 45300 }, { "epoch": 0.7954844712863639, "grad_norm": 0.05917001545708143, "learning_rate": 0.0001903204229852817, "loss": 0.8537, "step": 45310 }, { "epoch": 0.7956600361663653, "grad_norm": 0.05103812962728192, "learning_rate": 0.0001903156165884601, "loss": 0.8583, "step": 45320 }, { "epoch": 0.7958356010463666, "grad_norm": 0.06186221253990774, "learning_rate": 0.0001903108090599925, "loss": 0.853, "step": 45330 }, { "epoch": 0.7960111659263681, "grad_norm": 0.06817425122842442, "learning_rate": 0.0001903060003999398, "loss": 0.8572, "step": 45340 }, { "epoch": 0.7961867308063695, "grad_norm": 0.054659338938784274, "learning_rate": 0.00019030119060836298, "loss": 0.8525, "step": 45350 }, { "epoch": 0.7963622956863708, "grad_norm": 0.05144454493612122, "learning_rate": 0.00019029637968532296, "loss": 0.8506, "step": 45360 }, { "epoch": 0.7965378605663723, "grad_norm": 0.069271371433093, "learning_rate": 0.0001902915676308807, "loss": 0.8577, "step": 45370 }, { "epoch": 0.7967134254463737, "grad_norm": 0.06832503731925324, "learning_rate": 0.00019028675444509718, "loss": 0.8497, "step": 45380 }, { "epoch": 0.7968889903263751, "grad_norm": 0.059334702988105836, "learning_rate": 0.00019028194012803338, "loss": 0.8487, "step": 45390 }, { "epoch": 0.7970645552063765, "grad_norm": 0.060935876237263126, "learning_rate": 0.00019027712467975028, "loss": 0.8552, "step": 45400 }, { "epoch": 0.7972401200863779, "grad_norm": 0.05617883710968035, "learning_rate": 0.00019027230810030884, "loss": 0.8544, "step": 45410 }, { "epoch": 0.7974156849663794, "grad_norm": 0.06569343048613024, "learning_rate": 0.0001902674903897702, "loss": 0.8496, "step": 45420 }, { "epoch": 0.7975912498463807, "grad_norm": 0.07334980849485803, "learning_rate": 0.00019026267154819535, "loss": 0.8555, "step": 45430 }, { "epoch": 0.7977668147263821, "grad_norm": 0.07515709474897475, "learning_rate": 0.00019025785157564536, "loss": 0.8575, "step": 45440 }, { "epoch": 0.7979423796063836, "grad_norm": 0.05625817737110515, "learning_rate": 0.00019025303047218124, "loss": 0.8521, "step": 45450 }, { "epoch": 0.798117944486385, "grad_norm": 0.056018961146904155, "learning_rate": 0.00019024820823786414, "loss": 0.8473, "step": 45460 }, { "epoch": 0.7982935093663863, "grad_norm": 0.07014023364251756, "learning_rate": 0.00019024338487275513, "loss": 0.8531, "step": 45470 }, { "epoch": 0.7984690742463878, "grad_norm": 0.0570567039416256, "learning_rate": 0.00019023856037691535, "loss": 0.8401, "step": 45480 }, { "epoch": 0.7986446391263892, "grad_norm": 0.07096554283058747, "learning_rate": 0.00019023373475040588, "loss": 0.8509, "step": 45490 }, { "epoch": 0.7988202040063905, "grad_norm": 0.05431789560114488, "learning_rate": 0.0001902289079932879, "loss": 0.8582, "step": 45500 }, { "epoch": 0.798995768886392, "grad_norm": 0.05689089539075336, "learning_rate": 0.0001902240801056226, "loss": 0.8533, "step": 45510 }, { "epoch": 0.7991713337663934, "grad_norm": 0.06861304325485264, "learning_rate": 0.00019021925108747106, "loss": 0.8502, "step": 45520 }, { "epoch": 0.7993468986463947, "grad_norm": 0.07353205105233206, "learning_rate": 0.00019021442093889446, "loss": 0.8604, "step": 45530 }, { "epoch": 0.7995224635263962, "grad_norm": 0.14135287015450984, "learning_rate": 0.00019020958965995413, "loss": 0.854, "step": 45540 }, { "epoch": 0.7996980284063976, "grad_norm": 0.06564832681142667, "learning_rate": 0.00019020475725071116, "loss": 0.8551, "step": 45550 }, { "epoch": 0.799873593286399, "grad_norm": 0.050451671238620155, "learning_rate": 0.00019019992371122684, "loss": 0.8571, "step": 45560 }, { "epoch": 0.8000491581664004, "grad_norm": 0.07270916534337413, "learning_rate": 0.0001901950890415624, "loss": 0.861, "step": 45570 }, { "epoch": 0.8002247230464018, "grad_norm": 0.0718088232340436, "learning_rate": 0.00019019025324177905, "loss": 0.8598, "step": 45580 }, { "epoch": 0.8004002879264032, "grad_norm": 0.07205916574707691, "learning_rate": 0.00019018541631193815, "loss": 0.8502, "step": 45590 }, { "epoch": 0.8005758528064046, "grad_norm": 0.07515963801180701, "learning_rate": 0.0001901805782521009, "loss": 0.857, "step": 45600 }, { "epoch": 0.800751417686406, "grad_norm": 0.06248362538628925, "learning_rate": 0.00019017573906232864, "loss": 0.8642, "step": 45610 }, { "epoch": 0.8009269825664074, "grad_norm": 0.06292600064644602, "learning_rate": 0.00019017089874268266, "loss": 0.8578, "step": 45620 }, { "epoch": 0.8011025474464089, "grad_norm": 0.0905720160353948, "learning_rate": 0.00019016605729322433, "loss": 0.8542, "step": 45630 }, { "epoch": 0.8012781123264102, "grad_norm": 0.06798408775831243, "learning_rate": 0.00019016121471401496, "loss": 0.8536, "step": 45640 }, { "epoch": 0.8014536772064116, "grad_norm": 0.06479405117241491, "learning_rate": 0.00019015637100511587, "loss": 0.8484, "step": 45650 }, { "epoch": 0.8016292420864131, "grad_norm": 0.06962596196800161, "learning_rate": 0.00019015152616658852, "loss": 0.8496, "step": 45660 }, { "epoch": 0.8018048069664144, "grad_norm": 0.05599595964530864, "learning_rate": 0.00019014668019849424, "loss": 0.8548, "step": 45670 }, { "epoch": 0.8019803718464158, "grad_norm": 0.06960025740083006, "learning_rate": 0.00019014183310089443, "loss": 0.8499, "step": 45680 }, { "epoch": 0.8021559367264173, "grad_norm": 0.07668702677062234, "learning_rate": 0.00019013698487385053, "loss": 0.8555, "step": 45690 }, { "epoch": 0.8023315016064186, "grad_norm": 0.060637761563868235, "learning_rate": 0.00019013213551742392, "loss": 0.8467, "step": 45700 }, { "epoch": 0.80250706648642, "grad_norm": 0.06515311993243673, "learning_rate": 0.00019012728503167608, "loss": 0.855, "step": 45710 }, { "epoch": 0.8026826313664215, "grad_norm": 0.0617348418924647, "learning_rate": 0.00019012243341666845, "loss": 0.8621, "step": 45720 }, { "epoch": 0.8028581962464229, "grad_norm": 0.07433422568913817, "learning_rate": 0.00019011758067246252, "loss": 0.8471, "step": 45730 }, { "epoch": 0.8030337611264242, "grad_norm": 0.062126874546975276, "learning_rate": 0.00019011272679911976, "loss": 0.8498, "step": 45740 }, { "epoch": 0.8032093260064257, "grad_norm": 0.07438048193656709, "learning_rate": 0.00019010787179670163, "loss": 0.8475, "step": 45750 }, { "epoch": 0.8033848908864271, "grad_norm": 0.06045005832650598, "learning_rate": 0.0001901030156652698, "loss": 0.8555, "step": 45760 }, { "epoch": 0.8035604557664285, "grad_norm": 0.062183013856550264, "learning_rate": 0.0001900981584048856, "loss": 0.857, "step": 45770 }, { "epoch": 0.8037360206464299, "grad_norm": 0.08471155438516917, "learning_rate": 0.00019009330001561065, "loss": 0.8529, "step": 45780 }, { "epoch": 0.8039115855264313, "grad_norm": 0.0688981906475038, "learning_rate": 0.00019008844049750654, "loss": 0.858, "step": 45790 }, { "epoch": 0.8040871504064327, "grad_norm": 0.05868312393779347, "learning_rate": 0.00019008357985063484, "loss": 0.8646, "step": 45800 }, { "epoch": 0.8042627152864341, "grad_norm": 0.08072604235374833, "learning_rate": 0.00019007871807505706, "loss": 0.8539, "step": 45810 }, { "epoch": 0.8044382801664355, "grad_norm": 0.06845785003273397, "learning_rate": 0.00019007385517083486, "loss": 0.8495, "step": 45820 }, { "epoch": 0.804613845046437, "grad_norm": 0.07219590692821656, "learning_rate": 0.00019006899113802988, "loss": 0.852, "step": 45830 }, { "epoch": 0.8047894099264383, "grad_norm": 0.06302554500108988, "learning_rate": 0.00019006412597670367, "loss": 0.8509, "step": 45840 }, { "epoch": 0.8049649748064397, "grad_norm": 0.09711393393366734, "learning_rate": 0.00019005925968691793, "loss": 0.8557, "step": 45850 }, { "epoch": 0.8051405396864412, "grad_norm": 0.06239515712198112, "learning_rate": 0.0001900543922687343, "loss": 0.8559, "step": 45860 }, { "epoch": 0.8053161045664425, "grad_norm": 0.06423551032948352, "learning_rate": 0.00019004952372221446, "loss": 0.8558, "step": 45870 }, { "epoch": 0.8054916694464439, "grad_norm": 0.06998223903054362, "learning_rate": 0.0001900446540474201, "loss": 0.8511, "step": 45880 }, { "epoch": 0.8056672343264454, "grad_norm": 0.07519542701299065, "learning_rate": 0.0001900397832444129, "loss": 0.8478, "step": 45890 }, { "epoch": 0.8058427992064467, "grad_norm": 0.055711281749932666, "learning_rate": 0.00019003491131325454, "loss": 0.856, "step": 45900 }, { "epoch": 0.8060183640864481, "grad_norm": 0.07984023744053496, "learning_rate": 0.00019003003825400684, "loss": 0.8529, "step": 45910 }, { "epoch": 0.8061939289664496, "grad_norm": 0.05884499549278109, "learning_rate": 0.00019002516406673147, "loss": 0.8518, "step": 45920 }, { "epoch": 0.806369493846451, "grad_norm": 0.09758703691653474, "learning_rate": 0.00019002028875149025, "loss": 0.8395, "step": 45930 }, { "epoch": 0.8065450587264523, "grad_norm": 0.05818485183307461, "learning_rate": 0.00019001541230834489, "loss": 0.8493, "step": 45940 }, { "epoch": 0.8067206236064538, "grad_norm": 0.07467009776160706, "learning_rate": 0.00019001053473735718, "loss": 0.8437, "step": 45950 }, { "epoch": 0.8068961884864552, "grad_norm": 0.06436428824673444, "learning_rate": 0.00019000565603858894, "loss": 0.8562, "step": 45960 }, { "epoch": 0.8070717533664565, "grad_norm": 0.059840978988384086, "learning_rate": 0.00019000077621210198, "loss": 0.8604, "step": 45970 }, { "epoch": 0.807247318246458, "grad_norm": 0.06514624371310407, "learning_rate": 0.00018999589525795814, "loss": 0.8568, "step": 45980 }, { "epoch": 0.8074228831264594, "grad_norm": 0.08648604944448783, "learning_rate": 0.0001899910131762192, "loss": 0.8502, "step": 45990 }, { "epoch": 0.8075984480064607, "grad_norm": 0.06462220560216052, "learning_rate": 0.00018998612996694712, "loss": 0.8519, "step": 46000 }, { "epoch": 0.8077740128864622, "grad_norm": 0.06157552225329187, "learning_rate": 0.0001899812456302037, "loss": 0.8623, "step": 46010 }, { "epoch": 0.8079495777664636, "grad_norm": 0.0592896795163793, "learning_rate": 0.00018997636016605089, "loss": 0.8482, "step": 46020 }, { "epoch": 0.808125142646465, "grad_norm": 0.0774906996768122, "learning_rate": 0.0001899714735745505, "loss": 0.85, "step": 46030 }, { "epoch": 0.8083007075264664, "grad_norm": 0.06641715380737508, "learning_rate": 0.0001899665858557645, "loss": 0.8584, "step": 46040 }, { "epoch": 0.8084762724064678, "grad_norm": 0.04446826120888274, "learning_rate": 0.00018996169700975478, "loss": 0.8557, "step": 46050 }, { "epoch": 0.8086518372864692, "grad_norm": 0.056947636781394105, "learning_rate": 0.00018995680703658334, "loss": 0.8457, "step": 46060 }, { "epoch": 0.8088274021664706, "grad_norm": 0.05127344980934396, "learning_rate": 0.0001899519159363121, "loss": 0.8577, "step": 46070 }, { "epoch": 0.809002967046472, "grad_norm": 0.06168734081101779, "learning_rate": 0.00018994702370900307, "loss": 0.8466, "step": 46080 }, { "epoch": 0.8091785319264734, "grad_norm": 0.07757152856414536, "learning_rate": 0.00018994213035471814, "loss": 0.8453, "step": 46090 }, { "epoch": 0.8093540968064749, "grad_norm": 0.0663690580983604, "learning_rate": 0.00018993723587351941, "loss": 0.8495, "step": 46100 }, { "epoch": 0.8095296616864762, "grad_norm": 0.05425441203936288, "learning_rate": 0.00018993234026546887, "loss": 0.855, "step": 46110 }, { "epoch": 0.8097052265664776, "grad_norm": 0.0661423712070176, "learning_rate": 0.0001899274435306285, "loss": 0.8519, "step": 46120 }, { "epoch": 0.8098807914464791, "grad_norm": 0.059588850809817774, "learning_rate": 0.0001899225456690604, "loss": 0.8546, "step": 46130 }, { "epoch": 0.8100563563264804, "grad_norm": 0.05379269660924585, "learning_rate": 0.00018991764668082657, "loss": 0.8504, "step": 46140 }, { "epoch": 0.8102319212064819, "grad_norm": 0.06511348152017496, "learning_rate": 0.00018991274656598916, "loss": 0.8482, "step": 46150 }, { "epoch": 0.8104074860864833, "grad_norm": 0.11496745206431154, "learning_rate": 0.00018990784532461018, "loss": 0.8448, "step": 46160 }, { "epoch": 0.8105830509664846, "grad_norm": 0.060175012557130585, "learning_rate": 0.00018990294295675178, "loss": 0.8555, "step": 46170 }, { "epoch": 0.8107586158464861, "grad_norm": 0.09133606199171163, "learning_rate": 0.00018989803946247604, "loss": 0.85, "step": 46180 }, { "epoch": 0.8109341807264875, "grad_norm": 0.06552692364055855, "learning_rate": 0.0001898931348418451, "loss": 0.8542, "step": 46190 }, { "epoch": 0.8111097456064889, "grad_norm": 0.0506313239200749, "learning_rate": 0.00018988822909492116, "loss": 0.8539, "step": 46200 }, { "epoch": 0.8112853104864903, "grad_norm": 0.055178451795513755, "learning_rate": 0.00018988332222176626, "loss": 0.8559, "step": 46210 }, { "epoch": 0.8114608753664917, "grad_norm": 0.08336674919628562, "learning_rate": 0.00018987841422244263, "loss": 0.8558, "step": 46220 }, { "epoch": 0.8116364402464931, "grad_norm": 0.06613109408248742, "learning_rate": 0.0001898735050970125, "loss": 0.8528, "step": 46230 }, { "epoch": 0.8118120051264945, "grad_norm": 0.05368596252693342, "learning_rate": 0.00018986859484553803, "loss": 0.8509, "step": 46240 }, { "epoch": 0.8119875700064959, "grad_norm": 0.07274600966979473, "learning_rate": 0.0001898636834680814, "loss": 0.845, "step": 46250 }, { "epoch": 0.8121631348864973, "grad_norm": 0.05408585885989997, "learning_rate": 0.0001898587709647049, "loss": 0.8664, "step": 46260 }, { "epoch": 0.8123386997664988, "grad_norm": 0.06368971279783388, "learning_rate": 0.0001898538573354707, "loss": 0.8506, "step": 46270 }, { "epoch": 0.8125142646465001, "grad_norm": 0.06683808065200275, "learning_rate": 0.0001898489425804411, "loss": 0.8559, "step": 46280 }, { "epoch": 0.8126898295265015, "grad_norm": 0.0664593878067276, "learning_rate": 0.00018984402669967838, "loss": 0.8481, "step": 46290 }, { "epoch": 0.812865394406503, "grad_norm": 0.09066985591978259, "learning_rate": 0.00018983910969324485, "loss": 0.8535, "step": 46300 }, { "epoch": 0.8130409592865043, "grad_norm": 0.07055927716437695, "learning_rate": 0.00018983419156120272, "loss": 0.8554, "step": 46310 }, { "epoch": 0.8132165241665057, "grad_norm": 0.0683822843313708, "learning_rate": 0.0001898292723036144, "loss": 0.8583, "step": 46320 }, { "epoch": 0.8133920890465072, "grad_norm": 0.07174044593998855, "learning_rate": 0.00018982435192054216, "loss": 0.8521, "step": 46330 }, { "epoch": 0.8135676539265085, "grad_norm": 0.09296313581021926, "learning_rate": 0.00018981943041204838, "loss": 0.8447, "step": 46340 }, { "epoch": 0.8137432188065099, "grad_norm": 0.07905175687380822, "learning_rate": 0.0001898145077781954, "loss": 0.8606, "step": 46350 }, { "epoch": 0.8139187836865114, "grad_norm": 0.0669024680036709, "learning_rate": 0.00018980958401904554, "loss": 0.8569, "step": 46360 }, { "epoch": 0.8140943485665127, "grad_norm": 0.07491727357700086, "learning_rate": 0.00018980465913466125, "loss": 0.8498, "step": 46370 }, { "epoch": 0.8142699134465141, "grad_norm": 0.08247727238434298, "learning_rate": 0.00018979973312510488, "loss": 0.8556, "step": 46380 }, { "epoch": 0.8144454783265156, "grad_norm": 0.07121264764651228, "learning_rate": 0.00018979480599043893, "loss": 0.857, "step": 46390 }, { "epoch": 0.814621043206517, "grad_norm": 0.05457497769029417, "learning_rate": 0.00018978987773072574, "loss": 0.8494, "step": 46400 }, { "epoch": 0.8147966080865183, "grad_norm": 0.05328847301085295, "learning_rate": 0.00018978494834602776, "loss": 0.848, "step": 46410 }, { "epoch": 0.8149721729665198, "grad_norm": 0.061759649156685546, "learning_rate": 0.00018978001783640752, "loss": 0.85, "step": 46420 }, { "epoch": 0.8151477378465212, "grad_norm": 0.0588616526254329, "learning_rate": 0.00018977508620192742, "loss": 0.8516, "step": 46430 }, { "epoch": 0.8153233027265225, "grad_norm": 0.06488098197148062, "learning_rate": 0.00018977015344264994, "loss": 0.851, "step": 46440 }, { "epoch": 0.815498867606524, "grad_norm": 0.06466759075810592, "learning_rate": 0.00018976521955863765, "loss": 0.8575, "step": 46450 }, { "epoch": 0.8156744324865254, "grad_norm": 0.06653023457695677, "learning_rate": 0.000189760284549953, "loss": 0.8594, "step": 46460 }, { "epoch": 0.8158499973665267, "grad_norm": 0.06644839093451302, "learning_rate": 0.00018975534841665848, "loss": 0.8525, "step": 46470 }, { "epoch": 0.8160255622465282, "grad_norm": 0.0491603682529446, "learning_rate": 0.00018975041115881674, "loss": 0.8547, "step": 46480 }, { "epoch": 0.8162011271265296, "grad_norm": 0.06412496344829843, "learning_rate": 0.00018974547277649027, "loss": 0.8576, "step": 46490 }, { "epoch": 0.8163766920065311, "grad_norm": 0.06739642489738054, "learning_rate": 0.00018974053326974163, "loss": 0.8583, "step": 46500 }, { "epoch": 0.8165522568865324, "grad_norm": 0.07602797873970307, "learning_rate": 0.00018973559263863347, "loss": 0.8549, "step": 46510 }, { "epoch": 0.8167278217665338, "grad_norm": 0.07027213539478283, "learning_rate": 0.00018973065088322832, "loss": 0.851, "step": 46520 }, { "epoch": 0.8169033866465353, "grad_norm": 0.06884519127119716, "learning_rate": 0.0001897257080035888, "loss": 0.8556, "step": 46530 }, { "epoch": 0.8170789515265366, "grad_norm": 0.0900592303468677, "learning_rate": 0.00018972076399977758, "loss": 0.8506, "step": 46540 }, { "epoch": 0.817254516406538, "grad_norm": 0.0786951425220957, "learning_rate": 0.0001897158188718573, "loss": 0.8538, "step": 46550 }, { "epoch": 0.8174300812865395, "grad_norm": 0.06821753008568109, "learning_rate": 0.00018971087261989057, "loss": 0.8489, "step": 46560 }, { "epoch": 0.8176056461665409, "grad_norm": 0.0554611762104938, "learning_rate": 0.00018970592524394003, "loss": 0.8476, "step": 46570 }, { "epoch": 0.8177812110465422, "grad_norm": 0.05375327186207906, "learning_rate": 0.00018970097674406847, "loss": 0.8532, "step": 46580 }, { "epoch": 0.8179567759265437, "grad_norm": 0.07623888841753557, "learning_rate": 0.00018969602712033855, "loss": 0.8531, "step": 46590 }, { "epoch": 0.8181323408065451, "grad_norm": 0.07209546840059503, "learning_rate": 0.00018969107637281296, "loss": 0.8457, "step": 46600 }, { "epoch": 0.8183079056865464, "grad_norm": 0.06387397050678097, "learning_rate": 0.00018968612450155444, "loss": 0.8464, "step": 46610 }, { "epoch": 0.8184834705665479, "grad_norm": 0.08632149518400693, "learning_rate": 0.0001896811715066257, "loss": 0.8427, "step": 46620 }, { "epoch": 0.8186590354465493, "grad_norm": 0.04925674758261725, "learning_rate": 0.00018967621738808952, "loss": 0.8559, "step": 46630 }, { "epoch": 0.8188346003265506, "grad_norm": 0.05249711930504795, "learning_rate": 0.00018967126214600868, "loss": 0.8567, "step": 46640 }, { "epoch": 0.8190101652065521, "grad_norm": 0.05559878862568373, "learning_rate": 0.00018966630578044592, "loss": 0.8555, "step": 46650 }, { "epoch": 0.8191857300865535, "grad_norm": 0.07547487101227267, "learning_rate": 0.00018966134829146415, "loss": 0.8582, "step": 46660 }, { "epoch": 0.8193612949665549, "grad_norm": 0.06570766306806376, "learning_rate": 0.00018965638967912603, "loss": 0.8467, "step": 46670 }, { "epoch": 0.8195368598465563, "grad_norm": 0.11626975920103669, "learning_rate": 0.00018965142994349453, "loss": 0.8511, "step": 46680 }, { "epoch": 0.8197124247265577, "grad_norm": 0.05474368290740394, "learning_rate": 0.0001896464690846324, "loss": 0.8477, "step": 46690 }, { "epoch": 0.8198879896065591, "grad_norm": 0.06385719410362763, "learning_rate": 0.0001896415071026025, "loss": 0.8533, "step": 46700 }, { "epoch": 0.8200635544865605, "grad_norm": 0.0923714158351842, "learning_rate": 0.00018963654399746773, "loss": 0.8491, "step": 46710 }, { "epoch": 0.8202391193665619, "grad_norm": 0.06365559006995453, "learning_rate": 0.00018963157976929093, "loss": 0.8432, "step": 46720 }, { "epoch": 0.8204146842465633, "grad_norm": 0.06302473207033202, "learning_rate": 0.00018962661441813505, "loss": 0.8525, "step": 46730 }, { "epoch": 0.8205902491265648, "grad_norm": 0.05058420249561633, "learning_rate": 0.000189621647944063, "loss": 0.8451, "step": 46740 }, { "epoch": 0.8207658140065661, "grad_norm": 0.07301534961703443, "learning_rate": 0.00018961668034713766, "loss": 0.8495, "step": 46750 }, { "epoch": 0.8209413788865675, "grad_norm": 0.06499321177916682, "learning_rate": 0.000189611711627422, "loss": 0.8519, "step": 46760 }, { "epoch": 0.821116943766569, "grad_norm": 0.05946538615108797, "learning_rate": 0.00018960674178497897, "loss": 0.851, "step": 46770 }, { "epoch": 0.8212925086465703, "grad_norm": 0.08544450713699828, "learning_rate": 0.00018960177081987157, "loss": 0.8444, "step": 46780 }, { "epoch": 0.8214680735265717, "grad_norm": 0.0857770588339982, "learning_rate": 0.0001895967987321627, "loss": 0.8528, "step": 46790 }, { "epoch": 0.8216436384065732, "grad_norm": 0.07769071916905061, "learning_rate": 0.00018959182552191547, "loss": 0.8602, "step": 46800 }, { "epoch": 0.8218192032865745, "grad_norm": 0.055583775372724104, "learning_rate": 0.0001895868511891928, "loss": 0.8554, "step": 46810 }, { "epoch": 0.8219947681665759, "grad_norm": 0.09093404250839464, "learning_rate": 0.00018958187573405773, "loss": 0.8537, "step": 46820 }, { "epoch": 0.8221703330465774, "grad_norm": 0.061864116126272406, "learning_rate": 0.00018957689915657336, "loss": 0.8533, "step": 46830 }, { "epoch": 0.8223458979265788, "grad_norm": 0.056774982103093626, "learning_rate": 0.00018957192145680266, "loss": 0.8544, "step": 46840 }, { "epoch": 0.8225214628065801, "grad_norm": 0.06072434128807315, "learning_rate": 0.00018956694263480879, "loss": 0.8528, "step": 46850 }, { "epoch": 0.8226970276865816, "grad_norm": 0.09045058604369324, "learning_rate": 0.00018956196269065474, "loss": 0.8635, "step": 46860 }, { "epoch": 0.822872592566583, "grad_norm": 0.12291024021519797, "learning_rate": 0.00018955698162440368, "loss": 0.854, "step": 46870 }, { "epoch": 0.8230481574465844, "grad_norm": 0.10524487235125672, "learning_rate": 0.00018955199943611865, "loss": 0.8613, "step": 46880 }, { "epoch": 0.8232237223265858, "grad_norm": 0.06086763701661653, "learning_rate": 0.00018954701612586286, "loss": 0.851, "step": 46890 }, { "epoch": 0.8233992872065872, "grad_norm": 0.06502430575816841, "learning_rate": 0.00018954203169369938, "loss": 0.8526, "step": 46900 }, { "epoch": 0.8235748520865886, "grad_norm": 0.07456525943384433, "learning_rate": 0.0001895370461396914, "loss": 0.8614, "step": 46910 }, { "epoch": 0.82375041696659, "grad_norm": 0.0639397467096804, "learning_rate": 0.00018953205946390208, "loss": 0.846, "step": 46920 }, { "epoch": 0.8239259818465914, "grad_norm": 0.05925219447346283, "learning_rate": 0.00018952707166639457, "loss": 0.848, "step": 46930 }, { "epoch": 0.8241015467265929, "grad_norm": 0.101827744217073, "learning_rate": 0.0001895220827472321, "loss": 0.8469, "step": 46940 }, { "epoch": 0.8242771116065942, "grad_norm": 0.054317499693273014, "learning_rate": 0.00018951709270647793, "loss": 0.8524, "step": 46950 }, { "epoch": 0.8244526764865956, "grad_norm": 0.0513431063817031, "learning_rate": 0.00018951210154419517, "loss": 0.8442, "step": 46960 }, { "epoch": 0.8246282413665971, "grad_norm": 0.09557636288301301, "learning_rate": 0.00018950710926044718, "loss": 0.8473, "step": 46970 }, { "epoch": 0.8248038062465984, "grad_norm": 0.10685392522584745, "learning_rate": 0.0001895021158552971, "loss": 0.8538, "step": 46980 }, { "epoch": 0.8249793711265998, "grad_norm": 0.055512834277074596, "learning_rate": 0.00018949712132880828, "loss": 0.8521, "step": 46990 }, { "epoch": 0.8251549360066013, "grad_norm": 0.058856514896368346, "learning_rate": 0.00018949212568104398, "loss": 0.8597, "step": 47000 }, { "epoch": 0.8253305008866026, "grad_norm": 0.05917809867777741, "learning_rate": 0.00018948712891206745, "loss": 0.8519, "step": 47010 }, { "epoch": 0.825506065766604, "grad_norm": 0.07189503053379322, "learning_rate": 0.0001894821310219421, "loss": 0.8535, "step": 47020 }, { "epoch": 0.8256816306466055, "grad_norm": 0.06764289270821282, "learning_rate": 0.0001894771320107311, "loss": 0.8509, "step": 47030 }, { "epoch": 0.8258571955266069, "grad_norm": 0.059823661055223, "learning_rate": 0.00018947213187849794, "loss": 0.8523, "step": 47040 }, { "epoch": 0.8260327604066082, "grad_norm": 0.05143293988546911, "learning_rate": 0.00018946713062530592, "loss": 0.851, "step": 47050 }, { "epoch": 0.8262083252866097, "grad_norm": 0.05213759215253858, "learning_rate": 0.0001894621282512184, "loss": 0.8541, "step": 47060 }, { "epoch": 0.8263838901666111, "grad_norm": 0.07920603739970973, "learning_rate": 0.00018945712475629874, "loss": 0.8582, "step": 47070 }, { "epoch": 0.8265594550466124, "grad_norm": 0.05306529917732379, "learning_rate": 0.00018945212014061035, "loss": 0.8489, "step": 47080 }, { "epoch": 0.8267350199266139, "grad_norm": 0.06388662872974599, "learning_rate": 0.00018944711440421665, "loss": 0.8565, "step": 47090 }, { "epoch": 0.8269105848066153, "grad_norm": 0.0733585877991864, "learning_rate": 0.00018944210754718107, "loss": 0.8536, "step": 47100 }, { "epoch": 0.8270861496866166, "grad_norm": 0.0893423101182945, "learning_rate": 0.00018943709956956706, "loss": 0.8499, "step": 47110 }, { "epoch": 0.8272617145666181, "grad_norm": 0.06117599564128448, "learning_rate": 0.000189432090471438, "loss": 0.8486, "step": 47120 }, { "epoch": 0.8274372794466195, "grad_norm": 0.05345631581234062, "learning_rate": 0.00018942708025285743, "loss": 0.8599, "step": 47130 }, { "epoch": 0.8276128443266209, "grad_norm": 0.07153033672454288, "learning_rate": 0.00018942206891388883, "loss": 0.8495, "step": 47140 }, { "epoch": 0.8277884092066223, "grad_norm": 0.0613240341966764, "learning_rate": 0.00018941705645459563, "loss": 0.851, "step": 47150 }, { "epoch": 0.8279639740866237, "grad_norm": 0.09618872654848758, "learning_rate": 0.00018941204287504143, "loss": 0.8582, "step": 47160 }, { "epoch": 0.8281395389666251, "grad_norm": 0.07457352372633538, "learning_rate": 0.00018940702817528965, "loss": 0.8568, "step": 47170 }, { "epoch": 0.8283151038466265, "grad_norm": 0.05121564347886171, "learning_rate": 0.00018940201235540392, "loss": 0.8468, "step": 47180 }, { "epoch": 0.8284906687266279, "grad_norm": 0.07277432216715846, "learning_rate": 0.00018939699541544773, "loss": 0.854, "step": 47190 }, { "epoch": 0.8286662336066293, "grad_norm": 0.08359123319715725, "learning_rate": 0.00018939197735548467, "loss": 0.8505, "step": 47200 }, { "epoch": 0.8288417984866308, "grad_norm": 0.05689633741843706, "learning_rate": 0.00018938695817557832, "loss": 0.8545, "step": 47210 }, { "epoch": 0.8290173633666321, "grad_norm": 0.05071649776419942, "learning_rate": 0.00018938193787579227, "loss": 0.848, "step": 47220 }, { "epoch": 0.8291929282466336, "grad_norm": 0.0570919688560704, "learning_rate": 0.00018937691645619012, "loss": 0.862, "step": 47230 }, { "epoch": 0.829368493126635, "grad_norm": 0.05575275238720907, "learning_rate": 0.00018937189391683552, "loss": 0.8506, "step": 47240 }, { "epoch": 0.8295440580066363, "grad_norm": 0.07774245537941749, "learning_rate": 0.00018936687025779205, "loss": 0.8538, "step": 47250 }, { "epoch": 0.8297196228866378, "grad_norm": 0.08521369517396116, "learning_rate": 0.00018936184547912343, "loss": 0.849, "step": 47260 }, { "epoch": 0.8298951877666392, "grad_norm": 0.06826567445218286, "learning_rate": 0.00018935681958089329, "loss": 0.8508, "step": 47270 }, { "epoch": 0.8300707526466405, "grad_norm": 0.05248410465172415, "learning_rate": 0.00018935179256316528, "loss": 0.8466, "step": 47280 }, { "epoch": 0.830246317526642, "grad_norm": 0.045421752031187895, "learning_rate": 0.0001893467644260032, "loss": 0.8534, "step": 47290 }, { "epoch": 0.8304218824066434, "grad_norm": 0.05964232454685994, "learning_rate": 0.00018934173516947062, "loss": 0.8588, "step": 47300 }, { "epoch": 0.8305974472866448, "grad_norm": 0.07603719215337497, "learning_rate": 0.00018933670479363135, "loss": 0.8491, "step": 47310 }, { "epoch": 0.8307730121666462, "grad_norm": 0.05563556164514465, "learning_rate": 0.0001893316732985491, "loss": 0.8471, "step": 47320 }, { "epoch": 0.8309485770466476, "grad_norm": 0.05345872921400778, "learning_rate": 0.00018932664068428763, "loss": 0.8577, "step": 47330 }, { "epoch": 0.831124141926649, "grad_norm": 0.07456320629325344, "learning_rate": 0.00018932160695091065, "loss": 0.8557, "step": 47340 }, { "epoch": 0.8312997068066504, "grad_norm": 0.06870046593907769, "learning_rate": 0.000189316572098482, "loss": 0.8585, "step": 47350 }, { "epoch": 0.8314752716866518, "grad_norm": 0.05672111949170691, "learning_rate": 0.0001893115361270655, "loss": 0.8484, "step": 47360 }, { "epoch": 0.8316508365666532, "grad_norm": 0.07277531710607255, "learning_rate": 0.00018930649903672487, "loss": 0.86, "step": 47370 }, { "epoch": 0.8318264014466547, "grad_norm": 0.06294454113462596, "learning_rate": 0.00018930146082752396, "loss": 0.8594, "step": 47380 }, { "epoch": 0.832001966326656, "grad_norm": 0.05524929616482924, "learning_rate": 0.00018929642149952667, "loss": 0.8546, "step": 47390 }, { "epoch": 0.8321775312066574, "grad_norm": 0.06522904174290492, "learning_rate": 0.00018929138105279678, "loss": 0.8508, "step": 47400 }, { "epoch": 0.8323530960866589, "grad_norm": 0.06582262596064668, "learning_rate": 0.00018928633948739814, "loss": 0.8495, "step": 47410 }, { "epoch": 0.8325286609666602, "grad_norm": 0.06557657297444502, "learning_rate": 0.00018928129680339468, "loss": 0.8525, "step": 47420 }, { "epoch": 0.8327042258466616, "grad_norm": 0.052968359982008134, "learning_rate": 0.00018927625300085027, "loss": 0.8488, "step": 47430 }, { "epoch": 0.8328797907266631, "grad_norm": 0.054994853056666855, "learning_rate": 0.0001892712080798288, "loss": 0.8493, "step": 47440 }, { "epoch": 0.8330553556066644, "grad_norm": 0.05280822653741672, "learning_rate": 0.00018926616204039423, "loss": 0.8633, "step": 47450 }, { "epoch": 0.8332309204866658, "grad_norm": 0.06455053264293038, "learning_rate": 0.00018926111488261048, "loss": 0.8575, "step": 47460 }, { "epoch": 0.8334064853666673, "grad_norm": 0.057322035361754384, "learning_rate": 0.00018925606660654147, "loss": 0.8454, "step": 47470 }, { "epoch": 0.8335820502466686, "grad_norm": 0.062192408093644715, "learning_rate": 0.0001892510172122512, "loss": 0.8495, "step": 47480 }, { "epoch": 0.83375761512667, "grad_norm": 0.07002624974889766, "learning_rate": 0.00018924596669980362, "loss": 0.855, "step": 47490 }, { "epoch": 0.8339331800066715, "grad_norm": 0.05100828877780506, "learning_rate": 0.0001892409150692627, "loss": 0.8544, "step": 47500 }, { "epoch": 0.8341087448866729, "grad_norm": 0.06548445013709236, "learning_rate": 0.0001892358623206925, "loss": 0.8563, "step": 47510 }, { "epoch": 0.8342843097666742, "grad_norm": 0.1091812068044391, "learning_rate": 0.00018923080845415703, "loss": 0.8509, "step": 47520 }, { "epoch": 0.8344598746466757, "grad_norm": 0.07494999014990837, "learning_rate": 0.00018922575346972027, "loss": 0.8451, "step": 47530 }, { "epoch": 0.8346354395266771, "grad_norm": 0.0718689644197126, "learning_rate": 0.00018922069736744632, "loss": 0.8464, "step": 47540 }, { "epoch": 0.8348110044066784, "grad_norm": 0.07561613869229418, "learning_rate": 0.00018921564014739926, "loss": 0.8585, "step": 47550 }, { "epoch": 0.8349865692866799, "grad_norm": 0.062157002888636216, "learning_rate": 0.0001892105818096431, "loss": 0.8494, "step": 47560 }, { "epoch": 0.8351621341666813, "grad_norm": 0.051097607232915204, "learning_rate": 0.00018920552235424196, "loss": 0.8651, "step": 47570 }, { "epoch": 0.8353376990466826, "grad_norm": 0.06537968911808296, "learning_rate": 0.00018920046178125995, "loss": 0.8592, "step": 47580 }, { "epoch": 0.8355132639266841, "grad_norm": 0.06643674928046758, "learning_rate": 0.0001891954000907612, "loss": 0.8511, "step": 47590 }, { "epoch": 0.8356888288066855, "grad_norm": 0.05715773863786495, "learning_rate": 0.0001891903372828098, "loss": 0.8492, "step": 47600 }, { "epoch": 0.835864393686687, "grad_norm": 0.05653626934545264, "learning_rate": 0.00018918527335746993, "loss": 0.8563, "step": 47610 }, { "epoch": 0.8360399585666883, "grad_norm": 0.06068449756037767, "learning_rate": 0.0001891802083148058, "loss": 0.8491, "step": 47620 }, { "epoch": 0.8362155234466897, "grad_norm": 0.07914548372875925, "learning_rate": 0.00018917514215488148, "loss": 0.8524, "step": 47630 }, { "epoch": 0.8363910883266912, "grad_norm": 0.0846850791275102, "learning_rate": 0.00018917007487776122, "loss": 0.8546, "step": 47640 }, { "epoch": 0.8365666532066925, "grad_norm": 0.05719690953990907, "learning_rate": 0.00018916500648350921, "loss": 0.8524, "step": 47650 }, { "epoch": 0.8367422180866939, "grad_norm": 0.0800927039186236, "learning_rate": 0.00018915993697218966, "loss": 0.8499, "step": 47660 }, { "epoch": 0.8369177829666954, "grad_norm": 0.07140091198003722, "learning_rate": 0.00018915486634386684, "loss": 0.8562, "step": 47670 }, { "epoch": 0.8370933478466968, "grad_norm": 0.07856612912579983, "learning_rate": 0.00018914979459860496, "loss": 0.8564, "step": 47680 }, { "epoch": 0.8372689127266981, "grad_norm": 0.07666135405575163, "learning_rate": 0.00018914472173646828, "loss": 0.8518, "step": 47690 }, { "epoch": 0.8374444776066996, "grad_norm": 0.07288172798478493, "learning_rate": 0.00018913964775752108, "loss": 0.8519, "step": 47700 }, { "epoch": 0.837620042486701, "grad_norm": 0.05239959855927765, "learning_rate": 0.0001891345726618277, "loss": 0.8538, "step": 47710 }, { "epoch": 0.8377956073667023, "grad_norm": 0.05630675859985191, "learning_rate": 0.00018912949644945233, "loss": 0.8484, "step": 47720 }, { "epoch": 0.8379711722467038, "grad_norm": 0.06699310627732896, "learning_rate": 0.00018912441912045935, "loss": 0.8635, "step": 47730 }, { "epoch": 0.8381467371267052, "grad_norm": 0.09937560022934527, "learning_rate": 0.00018911934067491313, "loss": 0.8474, "step": 47740 }, { "epoch": 0.8383223020067065, "grad_norm": 0.07188212124129935, "learning_rate": 0.00018911426111287795, "loss": 0.8534, "step": 47750 }, { "epoch": 0.838497866886708, "grad_norm": 0.07552528276141186, "learning_rate": 0.00018910918043441822, "loss": 0.862, "step": 47760 }, { "epoch": 0.8386734317667094, "grad_norm": 0.047259683727201254, "learning_rate": 0.00018910409863959828, "loss": 0.8501, "step": 47770 }, { "epoch": 0.8388489966467108, "grad_norm": 0.06876500706262438, "learning_rate": 0.0001890990157284825, "loss": 0.8476, "step": 47780 }, { "epoch": 0.8390245615267122, "grad_norm": 0.057148324237052905, "learning_rate": 0.00018909393170113534, "loss": 0.856, "step": 47790 }, { "epoch": 0.8392001264067136, "grad_norm": 0.05238798926996781, "learning_rate": 0.00018908884655762117, "loss": 0.8477, "step": 47800 }, { "epoch": 0.839375691286715, "grad_norm": 0.06748613564255113, "learning_rate": 0.0001890837602980044, "loss": 0.8536, "step": 47810 }, { "epoch": 0.8395512561667164, "grad_norm": 0.06378335427244072, "learning_rate": 0.00018907867292234956, "loss": 0.8627, "step": 47820 }, { "epoch": 0.8397268210467178, "grad_norm": 0.09252113317017736, "learning_rate": 0.00018907358443072102, "loss": 0.8478, "step": 47830 }, { "epoch": 0.8399023859267192, "grad_norm": 0.06819248528190816, "learning_rate": 0.0001890684948231833, "loss": 0.8494, "step": 47840 }, { "epoch": 0.8400779508067207, "grad_norm": 0.05787257774764161, "learning_rate": 0.00018906340409980085, "loss": 0.8549, "step": 47850 }, { "epoch": 0.840253515686722, "grad_norm": 0.056192234492235926, "learning_rate": 0.00018905831226063822, "loss": 0.8548, "step": 47860 }, { "epoch": 0.8404290805667234, "grad_norm": 0.05164059613417292, "learning_rate": 0.00018905321930575987, "loss": 0.8495, "step": 47870 }, { "epoch": 0.8406046454467249, "grad_norm": 0.06701307710100989, "learning_rate": 0.0001890481252352304, "loss": 0.8552, "step": 47880 }, { "epoch": 0.8407802103267262, "grad_norm": 0.053163364422305785, "learning_rate": 0.00018904303004911428, "loss": 0.8606, "step": 47890 }, { "epoch": 0.8409557752067276, "grad_norm": 0.08250539753370305, "learning_rate": 0.0001890379337474761, "loss": 0.8558, "step": 47900 }, { "epoch": 0.8411313400867291, "grad_norm": 0.06342956315242838, "learning_rate": 0.0001890328363303804, "loss": 0.857, "step": 47910 }, { "epoch": 0.8413069049667304, "grad_norm": 0.049565943568239484, "learning_rate": 0.00018902773779789182, "loss": 0.8477, "step": 47920 }, { "epoch": 0.8414824698467318, "grad_norm": 0.060826431292887354, "learning_rate": 0.00018902263815007492, "loss": 0.8514, "step": 47930 }, { "epoch": 0.8416580347267333, "grad_norm": 0.11343369692025528, "learning_rate": 0.00018901753738699434, "loss": 0.8531, "step": 47940 }, { "epoch": 0.8418335996067347, "grad_norm": 0.06746706707410802, "learning_rate": 0.00018901243550871472, "loss": 0.8535, "step": 47950 }, { "epoch": 0.8420091644867361, "grad_norm": 0.05956609643218065, "learning_rate": 0.00018900733251530063, "loss": 0.8578, "step": 47960 }, { "epoch": 0.8421847293667375, "grad_norm": 0.11950673032210715, "learning_rate": 0.00018900222840681678, "loss": 0.8495, "step": 47970 }, { "epoch": 0.8423602942467389, "grad_norm": 0.07703795610397475, "learning_rate": 0.00018899712318332782, "loss": 0.8551, "step": 47980 }, { "epoch": 0.8425358591267403, "grad_norm": 0.04675449686301453, "learning_rate": 0.00018899201684489846, "loss": 0.8519, "step": 47990 }, { "epoch": 0.8427114240067417, "grad_norm": 0.05998750487164444, "learning_rate": 0.00018898690939159337, "loss": 0.8574, "step": 48000 }, { "epoch": 0.8428869888867431, "grad_norm": 0.07729410397985514, "learning_rate": 0.00018898180082347726, "loss": 0.8586, "step": 48010 }, { "epoch": 0.8430625537667445, "grad_norm": 0.09954703419178214, "learning_rate": 0.00018897669114061494, "loss": 0.8451, "step": 48020 }, { "epoch": 0.8432381186467459, "grad_norm": 0.1058881403922801, "learning_rate": 0.00018897158034307103, "loss": 0.8469, "step": 48030 }, { "epoch": 0.8434136835267473, "grad_norm": 0.0475617511337497, "learning_rate": 0.00018896646843091035, "loss": 0.8527, "step": 48040 }, { "epoch": 0.8435892484067488, "grad_norm": 0.06207884135915811, "learning_rate": 0.00018896135540419766, "loss": 0.8488, "step": 48050 }, { "epoch": 0.8437648132867501, "grad_norm": 0.0664470276834934, "learning_rate": 0.00018895624126299772, "loss": 0.8459, "step": 48060 }, { "epoch": 0.8439403781667515, "grad_norm": 0.04562696837851839, "learning_rate": 0.00018895112600737538, "loss": 0.8457, "step": 48070 }, { "epoch": 0.844115943046753, "grad_norm": 0.09223218404544382, "learning_rate": 0.00018894600963739538, "loss": 0.8483, "step": 48080 }, { "epoch": 0.8442915079267543, "grad_norm": 0.07869072727331555, "learning_rate": 0.00018894089215312258, "loss": 0.8596, "step": 48090 }, { "epoch": 0.8444670728067557, "grad_norm": 0.06307147327563047, "learning_rate": 0.00018893577355462185, "loss": 0.8501, "step": 48100 }, { "epoch": 0.8446426376867572, "grad_norm": 0.08250733999034059, "learning_rate": 0.000188930653841958, "loss": 0.8589, "step": 48110 }, { "epoch": 0.8448182025667585, "grad_norm": 0.09203451978422257, "learning_rate": 0.00018892553301519594, "loss": 0.8544, "step": 48120 }, { "epoch": 0.8449937674467599, "grad_norm": 0.08590057011378348, "learning_rate": 0.0001889204110744005, "loss": 0.8499, "step": 48130 }, { "epoch": 0.8451693323267614, "grad_norm": 0.06210318313792687, "learning_rate": 0.00018891528801963663, "loss": 0.8539, "step": 48140 }, { "epoch": 0.8453448972067628, "grad_norm": 0.0709859624938539, "learning_rate": 0.00018891016385096918, "loss": 0.8419, "step": 48150 }, { "epoch": 0.8455204620867641, "grad_norm": 0.06541316223319384, "learning_rate": 0.00018890503856846313, "loss": 0.8556, "step": 48160 }, { "epoch": 0.8456960269667656, "grad_norm": 0.0905139313030146, "learning_rate": 0.00018889991217218338, "loss": 0.8507, "step": 48170 }, { "epoch": 0.845871591846767, "grad_norm": 0.05462449082966102, "learning_rate": 0.0001888947846621949, "loss": 0.8467, "step": 48180 }, { "epoch": 0.8460471567267683, "grad_norm": 0.06557041860948322, "learning_rate": 0.00018888965603856267, "loss": 0.848, "step": 48190 }, { "epoch": 0.8462227216067698, "grad_norm": 0.043518409712607335, "learning_rate": 0.0001888845263013516, "loss": 0.856, "step": 48200 }, { "epoch": 0.8463982864867712, "grad_norm": 0.06548515268715119, "learning_rate": 0.00018887939545062679, "loss": 0.8527, "step": 48210 }, { "epoch": 0.8465738513667725, "grad_norm": 0.07209592735140236, "learning_rate": 0.00018887426348645318, "loss": 0.8489, "step": 48220 }, { "epoch": 0.846749416246774, "grad_norm": 0.06115298539679447, "learning_rate": 0.00018886913040889582, "loss": 0.8521, "step": 48230 }, { "epoch": 0.8469249811267754, "grad_norm": 0.06507427688130167, "learning_rate": 0.00018886399621801972, "loss": 0.8587, "step": 48240 }, { "epoch": 0.8471005460067768, "grad_norm": 0.08069562362492906, "learning_rate": 0.00018885886091388995, "loss": 0.8613, "step": 48250 }, { "epoch": 0.8472761108867782, "grad_norm": 0.07723261800417765, "learning_rate": 0.0001888537244965716, "loss": 0.8609, "step": 48260 }, { "epoch": 0.8474516757667796, "grad_norm": 0.11518396115848109, "learning_rate": 0.0001888485869661297, "loss": 0.8545, "step": 48270 }, { "epoch": 0.847627240646781, "grad_norm": 0.06075170931459192, "learning_rate": 0.00018884344832262938, "loss": 0.8559, "step": 48280 }, { "epoch": 0.8478028055267824, "grad_norm": 0.054000722681732845, "learning_rate": 0.00018883830856613573, "loss": 0.854, "step": 48290 }, { "epoch": 0.8479783704067838, "grad_norm": 0.059091077545673165, "learning_rate": 0.0001888331676967139, "loss": 0.846, "step": 48300 }, { "epoch": 0.8481539352867852, "grad_norm": 0.06748344078166037, "learning_rate": 0.00018882802571442897, "loss": 0.854, "step": 48310 }, { "epoch": 0.8483295001667867, "grad_norm": 0.06652518294493157, "learning_rate": 0.0001888228826193461, "loss": 0.843, "step": 48320 }, { "epoch": 0.848505065046788, "grad_norm": 0.08033835461162335, "learning_rate": 0.00018881773841153052, "loss": 0.8552, "step": 48330 }, { "epoch": 0.8486806299267895, "grad_norm": 0.07236344982634965, "learning_rate": 0.00018881259309104736, "loss": 0.856, "step": 48340 }, { "epoch": 0.8488561948067909, "grad_norm": 0.07156636833862395, "learning_rate": 0.00018880744665796182, "loss": 0.8616, "step": 48350 }, { "epoch": 0.8490317596867922, "grad_norm": 0.07523247935301755, "learning_rate": 0.00018880229911233908, "loss": 0.8486, "step": 48360 }, { "epoch": 0.8492073245667937, "grad_norm": 0.07234265918420145, "learning_rate": 0.0001887971504542444, "loss": 0.8471, "step": 48370 }, { "epoch": 0.8493828894467951, "grad_norm": 0.0806312503917523, "learning_rate": 0.00018879200068374296, "loss": 0.843, "step": 48380 }, { "epoch": 0.8495584543267964, "grad_norm": 0.06892892441253065, "learning_rate": 0.0001887868498009001, "loss": 0.86, "step": 48390 }, { "epoch": 0.8497340192067979, "grad_norm": 0.06427252071329209, "learning_rate": 0.000188781697805781, "loss": 0.8521, "step": 48400 }, { "epoch": 0.8499095840867993, "grad_norm": 0.0602552606640196, "learning_rate": 0.00018877654469845096, "loss": 0.849, "step": 48410 }, { "epoch": 0.8500851489668007, "grad_norm": 0.052413305662846646, "learning_rate": 0.00018877139047897528, "loss": 0.8492, "step": 48420 }, { "epoch": 0.8502607138468021, "grad_norm": 0.06172543841830905, "learning_rate": 0.00018876623514741927, "loss": 0.848, "step": 48430 }, { "epoch": 0.8504362787268035, "grad_norm": 0.06468147301090896, "learning_rate": 0.00018876107870384825, "loss": 0.8594, "step": 48440 }, { "epoch": 0.8506118436068049, "grad_norm": 0.06481177404416283, "learning_rate": 0.00018875592114832754, "loss": 0.8512, "step": 48450 }, { "epoch": 0.8507874084868063, "grad_norm": 0.06758501189555792, "learning_rate": 0.00018875076248092248, "loss": 0.8483, "step": 48460 }, { "epoch": 0.8509629733668077, "grad_norm": 0.07721368121010488, "learning_rate": 0.00018874560270169845, "loss": 0.8454, "step": 48470 }, { "epoch": 0.8511385382468091, "grad_norm": 0.0720966680310079, "learning_rate": 0.0001887404418107208, "loss": 0.8574, "step": 48480 }, { "epoch": 0.8513141031268106, "grad_norm": 0.0892230020609019, "learning_rate": 0.00018873527980805494, "loss": 0.8523, "step": 48490 }, { "epoch": 0.8514896680068119, "grad_norm": 0.05738708443025915, "learning_rate": 0.0001887301166937663, "loss": 0.8582, "step": 48500 }, { "epoch": 0.8516652328868133, "grad_norm": 0.06409508425777181, "learning_rate": 0.00018872495246792027, "loss": 0.8604, "step": 48510 }, { "epoch": 0.8518407977668148, "grad_norm": 0.053805326394150886, "learning_rate": 0.00018871978713058224, "loss": 0.8574, "step": 48520 }, { "epoch": 0.8520163626468161, "grad_norm": 0.09158301595534583, "learning_rate": 0.0001887146206818177, "loss": 0.8502, "step": 48530 }, { "epoch": 0.8521919275268175, "grad_norm": 0.06418058719569335, "learning_rate": 0.00018870945312169212, "loss": 0.8513, "step": 48540 }, { "epoch": 0.852367492406819, "grad_norm": 0.0760978214966419, "learning_rate": 0.00018870428445027097, "loss": 0.8483, "step": 48550 }, { "epoch": 0.8525430572868203, "grad_norm": 0.06422963049028697, "learning_rate": 0.00018869911466761968, "loss": 0.8528, "step": 48560 }, { "epoch": 0.8527186221668217, "grad_norm": 0.0796374149606218, "learning_rate": 0.00018869394377380385, "loss": 0.859, "step": 48570 }, { "epoch": 0.8528941870468232, "grad_norm": 0.0545622434360017, "learning_rate": 0.00018868877176888893, "loss": 0.8487, "step": 48580 }, { "epoch": 0.8530697519268245, "grad_norm": 0.08658295846770604, "learning_rate": 0.00018868359865294046, "loss": 0.8544, "step": 48590 }, { "epoch": 0.8532453168068259, "grad_norm": 0.06468235458730677, "learning_rate": 0.000188678424426024, "loss": 0.8548, "step": 48600 }, { "epoch": 0.8534208816868274, "grad_norm": 0.06249217541891776, "learning_rate": 0.00018867324908820513, "loss": 0.8505, "step": 48610 }, { "epoch": 0.8535964465668288, "grad_norm": 0.06611652097596006, "learning_rate": 0.00018866807263954934, "loss": 0.8509, "step": 48620 }, { "epoch": 0.8537720114468301, "grad_norm": 0.0832027570688526, "learning_rate": 0.0001886628950801223, "loss": 0.8629, "step": 48630 }, { "epoch": 0.8539475763268316, "grad_norm": 0.05192774804154658, "learning_rate": 0.00018865771640998958, "loss": 0.8458, "step": 48640 }, { "epoch": 0.854123141206833, "grad_norm": 0.0635379745341515, "learning_rate": 0.00018865253662921677, "loss": 0.853, "step": 48650 }, { "epoch": 0.8542987060868343, "grad_norm": 0.07595029092779879, "learning_rate": 0.0001886473557378695, "loss": 0.8597, "step": 48660 }, { "epoch": 0.8544742709668358, "grad_norm": 0.06861939922776017, "learning_rate": 0.00018864217373601346, "loss": 0.8524, "step": 48670 }, { "epoch": 0.8546498358468372, "grad_norm": 0.046819125027656665, "learning_rate": 0.0001886369906237143, "loss": 0.8593, "step": 48680 }, { "epoch": 0.8548254007268387, "grad_norm": 0.07382798905766637, "learning_rate": 0.00018863180640103766, "loss": 0.8598, "step": 48690 }, { "epoch": 0.85500096560684, "grad_norm": 0.07950287960605623, "learning_rate": 0.00018862662106804926, "loss": 0.8509, "step": 48700 }, { "epoch": 0.8551765304868414, "grad_norm": 0.07595087780881953, "learning_rate": 0.00018862143462481474, "loss": 0.8516, "step": 48710 }, { "epoch": 0.8553520953668429, "grad_norm": 0.05673667081468035, "learning_rate": 0.00018861624707139986, "loss": 0.8515, "step": 48720 }, { "epoch": 0.8555276602468442, "grad_norm": 0.05210360874673874, "learning_rate": 0.00018861105840787038, "loss": 0.8463, "step": 48730 }, { "epoch": 0.8557032251268456, "grad_norm": 0.05929645073633453, "learning_rate": 0.00018860586863429194, "loss": 0.8571, "step": 48740 }, { "epoch": 0.8558787900068471, "grad_norm": 0.050854820294379224, "learning_rate": 0.0001886006777507304, "loss": 0.8509, "step": 48750 }, { "epoch": 0.8560543548868484, "grad_norm": 0.059718452203761346, "learning_rate": 0.00018859548575725146, "loss": 0.8576, "step": 48760 }, { "epoch": 0.8562299197668498, "grad_norm": 0.06350814382251778, "learning_rate": 0.0001885902926539209, "loss": 0.8478, "step": 48770 }, { "epoch": 0.8564054846468513, "grad_norm": 0.05088790850418774, "learning_rate": 0.00018858509844080462, "loss": 0.8517, "step": 48780 }, { "epoch": 0.8565810495268527, "grad_norm": 0.16240285065545768, "learning_rate": 0.0001885799031179683, "loss": 0.855, "step": 48790 }, { "epoch": 0.856756614406854, "grad_norm": 0.07697751034937841, "learning_rate": 0.00018857470668547782, "loss": 0.854, "step": 48800 }, { "epoch": 0.8569321792868555, "grad_norm": 0.06503953927298918, "learning_rate": 0.00018856950914339905, "loss": 0.8467, "step": 48810 }, { "epoch": 0.8571077441668569, "grad_norm": 0.0835643310578262, "learning_rate": 0.0001885643104917978, "loss": 0.8581, "step": 48820 }, { "epoch": 0.8572833090468582, "grad_norm": 0.06812650330842171, "learning_rate": 0.00018855911073074, "loss": 0.8479, "step": 48830 }, { "epoch": 0.8574588739268597, "grad_norm": 0.06480460144371245, "learning_rate": 0.00018855390986029143, "loss": 0.8587, "step": 48840 }, { "epoch": 0.8576344388068611, "grad_norm": 0.0542475871005369, "learning_rate": 0.00018854870788051806, "loss": 0.8453, "step": 48850 }, { "epoch": 0.8578100036868624, "grad_norm": 0.07079363065802077, "learning_rate": 0.0001885435047914858, "loss": 0.8447, "step": 48860 }, { "epoch": 0.8579855685668639, "grad_norm": 0.06282690046457404, "learning_rate": 0.00018853830059326052, "loss": 0.85, "step": 48870 }, { "epoch": 0.8581611334468653, "grad_norm": 0.057426133760762485, "learning_rate": 0.0001885330952859082, "loss": 0.8475, "step": 48880 }, { "epoch": 0.8583366983268667, "grad_norm": 0.06559675437379853, "learning_rate": 0.00018852788886949482, "loss": 0.8588, "step": 48890 }, { "epoch": 0.8585122632068681, "grad_norm": 0.04906119772798667, "learning_rate": 0.00018852268134408632, "loss": 0.8492, "step": 48900 }, { "epoch": 0.8586878280868695, "grad_norm": 0.086849101837544, "learning_rate": 0.00018851747270974862, "loss": 0.8495, "step": 48910 }, { "epoch": 0.8588633929668709, "grad_norm": 0.0530837738279259, "learning_rate": 0.00018851226296654782, "loss": 0.8592, "step": 48920 }, { "epoch": 0.8590389578468723, "grad_norm": 0.07078255096720595, "learning_rate": 0.00018850705211454985, "loss": 0.8569, "step": 48930 }, { "epoch": 0.8592145227268737, "grad_norm": 0.057499243823498976, "learning_rate": 0.00018850184015382077, "loss": 0.8509, "step": 48940 }, { "epoch": 0.8593900876068751, "grad_norm": 0.05513845421797004, "learning_rate": 0.00018849662708442656, "loss": 0.8522, "step": 48950 }, { "epoch": 0.8595656524868766, "grad_norm": 0.07073522021812581, "learning_rate": 0.00018849141290643338, "loss": 0.8495, "step": 48960 }, { "epoch": 0.8597412173668779, "grad_norm": 0.07383646586279723, "learning_rate": 0.0001884861976199072, "loss": 0.8468, "step": 48970 }, { "epoch": 0.8599167822468793, "grad_norm": 0.05664629210241765, "learning_rate": 0.00018848098122491413, "loss": 0.8537, "step": 48980 }, { "epoch": 0.8600923471268808, "grad_norm": 0.07063373916246846, "learning_rate": 0.00018847576372152028, "loss": 0.8573, "step": 48990 }, { "epoch": 0.8602679120068821, "grad_norm": 0.06586540224634967, "learning_rate": 0.00018847054510979174, "loss": 0.8567, "step": 49000 }, { "epoch": 0.8604434768868835, "grad_norm": 0.057944043844559534, "learning_rate": 0.0001884653253897946, "loss": 0.8557, "step": 49010 }, { "epoch": 0.860619041766885, "grad_norm": 0.05788324166384675, "learning_rate": 0.00018846010456159505, "loss": 0.8521, "step": 49020 }, { "epoch": 0.8607946066468863, "grad_norm": 0.08862259518119608, "learning_rate": 0.00018845488262525916, "loss": 0.8479, "step": 49030 }, { "epoch": 0.8609701715268877, "grad_norm": 0.05674427754528213, "learning_rate": 0.00018844965958085322, "loss": 0.8615, "step": 49040 }, { "epoch": 0.8611457364068892, "grad_norm": 0.054331443417079586, "learning_rate": 0.0001884444354284433, "loss": 0.8493, "step": 49050 }, { "epoch": 0.8613213012868906, "grad_norm": 0.055908327047893225, "learning_rate": 0.00018843921016809562, "loss": 0.85, "step": 49060 }, { "epoch": 0.861496866166892, "grad_norm": 0.07243997884314513, "learning_rate": 0.00018843398379987642, "loss": 0.8522, "step": 49070 }, { "epoch": 0.8616724310468934, "grad_norm": 0.10604106335356121, "learning_rate": 0.00018842875632385186, "loss": 0.8585, "step": 49080 }, { "epoch": 0.8618479959268948, "grad_norm": 0.05982790050313992, "learning_rate": 0.0001884235277400882, "loss": 0.8527, "step": 49090 }, { "epoch": 0.8620235608068962, "grad_norm": 0.0858648296732026, "learning_rate": 0.0001884182980486517, "loss": 0.8524, "step": 49100 }, { "epoch": 0.8621991256868976, "grad_norm": 0.07123577021516432, "learning_rate": 0.00018841306724960862, "loss": 0.8487, "step": 49110 }, { "epoch": 0.862374690566899, "grad_norm": 0.09009399942839286, "learning_rate": 0.0001884078353430252, "loss": 0.8384, "step": 49120 }, { "epoch": 0.8625502554469004, "grad_norm": 0.06553789082569897, "learning_rate": 0.0001884026023289678, "loss": 0.8548, "step": 49130 }, { "epoch": 0.8627258203269018, "grad_norm": 0.04590071150377029, "learning_rate": 0.0001883973682075026, "loss": 0.8532, "step": 49140 }, { "epoch": 0.8629013852069032, "grad_norm": 0.10180630883274064, "learning_rate": 0.00018839213297869607, "loss": 0.8596, "step": 49150 }, { "epoch": 0.8630769500869047, "grad_norm": 0.11495616293129815, "learning_rate": 0.00018838689664261443, "loss": 0.8519, "step": 49160 }, { "epoch": 0.863252514966906, "grad_norm": 0.09551178827087767, "learning_rate": 0.00018838165919932408, "loss": 0.8527, "step": 49170 }, { "epoch": 0.8634280798469074, "grad_norm": 0.05571187415674919, "learning_rate": 0.00018837642064889137, "loss": 0.8576, "step": 49180 }, { "epoch": 0.8636036447269089, "grad_norm": 0.05427015232116651, "learning_rate": 0.0001883711809913826, "loss": 0.8477, "step": 49190 }, { "epoch": 0.8637792096069102, "grad_norm": 0.05772438890735048, "learning_rate": 0.0001883659402268643, "loss": 0.8511, "step": 49200 }, { "epoch": 0.8639547744869116, "grad_norm": 0.051077930443854454, "learning_rate": 0.00018836069835540274, "loss": 0.8652, "step": 49210 }, { "epoch": 0.8641303393669131, "grad_norm": 0.05168386271773331, "learning_rate": 0.0001883554553770644, "loss": 0.8511, "step": 49220 }, { "epoch": 0.8643059042469144, "grad_norm": 0.06605208869396463, "learning_rate": 0.0001883502112919157, "loss": 0.8548, "step": 49230 }, { "epoch": 0.8644814691269158, "grad_norm": 0.04772102692795562, "learning_rate": 0.0001883449661000231, "loss": 0.8562, "step": 49240 }, { "epoch": 0.8646570340069173, "grad_norm": 0.060875442870127555, "learning_rate": 0.00018833971980145305, "loss": 0.8499, "step": 49250 }, { "epoch": 0.8648325988869187, "grad_norm": 0.10989091518484727, "learning_rate": 0.00018833447239627196, "loss": 0.8579, "step": 49260 }, { "epoch": 0.86500816376692, "grad_norm": 0.10878467812771798, "learning_rate": 0.00018832922388454645, "loss": 0.8545, "step": 49270 }, { "epoch": 0.8651837286469215, "grad_norm": 0.08285304686892267, "learning_rate": 0.00018832397426634285, "loss": 0.8608, "step": 49280 }, { "epoch": 0.8653592935269229, "grad_norm": 0.0544080436075462, "learning_rate": 0.00018831872354172782, "loss": 0.8512, "step": 49290 }, { "epoch": 0.8655348584069242, "grad_norm": 0.0735796272968038, "learning_rate": 0.00018831347171076782, "loss": 0.8551, "step": 49300 }, { "epoch": 0.8657104232869257, "grad_norm": 0.054824296040180485, "learning_rate": 0.0001883082187735294, "loss": 0.8542, "step": 49310 }, { "epoch": 0.8658859881669271, "grad_norm": 0.06255208373192539, "learning_rate": 0.00018830296473007908, "loss": 0.8592, "step": 49320 }, { "epoch": 0.8660615530469284, "grad_norm": 0.055137282736054054, "learning_rate": 0.00018829770958048349, "loss": 0.8528, "step": 49330 }, { "epoch": 0.8662371179269299, "grad_norm": 0.04610351608859931, "learning_rate": 0.00018829245332480915, "loss": 0.8451, "step": 49340 }, { "epoch": 0.8664126828069313, "grad_norm": 0.06055008103391821, "learning_rate": 0.00018828719596312277, "loss": 0.8421, "step": 49350 }, { "epoch": 0.8665882476869327, "grad_norm": 0.06462751261675172, "learning_rate": 0.00018828193749549082, "loss": 0.849, "step": 49360 }, { "epoch": 0.8667638125669341, "grad_norm": 0.05248056979369269, "learning_rate": 0.00018827667792198002, "loss": 0.8577, "step": 49370 }, { "epoch": 0.8669393774469355, "grad_norm": 0.1030962948795938, "learning_rate": 0.000188271417242657, "loss": 0.8541, "step": 49380 }, { "epoch": 0.8671149423269369, "grad_norm": 0.08054578339386094, "learning_rate": 0.0001882661554575884, "loss": 0.8589, "step": 49390 }, { "epoch": 0.8672905072069383, "grad_norm": 0.04183782322278747, "learning_rate": 0.00018826089256684085, "loss": 0.8534, "step": 49400 }, { "epoch": 0.8674660720869397, "grad_norm": 0.05853861649587684, "learning_rate": 0.0001882556285704811, "loss": 0.8579, "step": 49410 }, { "epoch": 0.8676416369669412, "grad_norm": 0.09210096345908551, "learning_rate": 0.00018825036346857582, "loss": 0.8469, "step": 49420 }, { "epoch": 0.8678172018469426, "grad_norm": 0.051598428433766434, "learning_rate": 0.0001882450972611917, "loss": 0.8556, "step": 49430 }, { "epoch": 0.8679927667269439, "grad_norm": 0.04558613423209515, "learning_rate": 0.00018823982994839548, "loss": 0.8552, "step": 49440 }, { "epoch": 0.8681683316069454, "grad_norm": 0.06059451115088109, "learning_rate": 0.00018823456153025387, "loss": 0.8444, "step": 49450 }, { "epoch": 0.8683438964869468, "grad_norm": 0.05398534645398694, "learning_rate": 0.0001882292920068337, "loss": 0.8544, "step": 49460 }, { "epoch": 0.8685194613669481, "grad_norm": 0.07005999080060873, "learning_rate": 0.00018822402137820164, "loss": 0.8557, "step": 49470 }, { "epoch": 0.8686950262469496, "grad_norm": 0.0692717821255926, "learning_rate": 0.00018821874964442453, "loss": 0.8545, "step": 49480 }, { "epoch": 0.868870591126951, "grad_norm": 0.08347241271354426, "learning_rate": 0.00018821347680556908, "loss": 0.8457, "step": 49490 }, { "epoch": 0.8690461560069523, "grad_norm": 0.06371701752484844, "learning_rate": 0.00018820820286170227, "loss": 0.8445, "step": 49500 }, { "epoch": 0.8692217208869538, "grad_norm": 0.14650549724108275, "learning_rate": 0.00018820292781289078, "loss": 0.8466, "step": 49510 }, { "epoch": 0.8693972857669552, "grad_norm": 0.057074211415284265, "learning_rate": 0.00018819765165920145, "loss": 0.8574, "step": 49520 }, { "epoch": 0.8695728506469566, "grad_norm": 0.07324111672422629, "learning_rate": 0.0001881923744007012, "loss": 0.8518, "step": 49530 }, { "epoch": 0.869748415526958, "grad_norm": 0.059184613491639296, "learning_rate": 0.00018818709603745683, "loss": 0.8576, "step": 49540 }, { "epoch": 0.8699239804069594, "grad_norm": 0.08598080862807804, "learning_rate": 0.00018818181656953523, "loss": 0.8505, "step": 49550 }, { "epoch": 0.8700995452869608, "grad_norm": 0.07715849359938544, "learning_rate": 0.00018817653599700337, "loss": 0.8519, "step": 49560 }, { "epoch": 0.8702751101669622, "grad_norm": 0.07413488146228027, "learning_rate": 0.00018817125431992804, "loss": 0.8565, "step": 49570 }, { "epoch": 0.8704506750469636, "grad_norm": 0.049328003297057436, "learning_rate": 0.00018816597153837624, "loss": 0.8463, "step": 49580 }, { "epoch": 0.870626239926965, "grad_norm": 0.07365703034633267, "learning_rate": 0.00018816068765241484, "loss": 0.8608, "step": 49590 }, { "epoch": 0.8708018048069665, "grad_norm": 0.07107788122916416, "learning_rate": 0.00018815540266211084, "loss": 0.852, "step": 49600 }, { "epoch": 0.8709773696869678, "grad_norm": 0.05561036358200915, "learning_rate": 0.0001881501165675312, "loss": 0.8519, "step": 49610 }, { "epoch": 0.8711529345669692, "grad_norm": 0.08174601995457047, "learning_rate": 0.00018814482936874288, "loss": 0.8487, "step": 49620 }, { "epoch": 0.8713284994469707, "grad_norm": 0.07060665284047575, "learning_rate": 0.00018813954106581285, "loss": 0.849, "step": 49630 }, { "epoch": 0.871504064326972, "grad_norm": 0.084614316957792, "learning_rate": 0.00018813425165880814, "loss": 0.8595, "step": 49640 }, { "epoch": 0.8716796292069734, "grad_norm": 0.05318389636104503, "learning_rate": 0.00018812896114779576, "loss": 0.8531, "step": 49650 }, { "epoch": 0.8718551940869749, "grad_norm": 0.05362971781934206, "learning_rate": 0.00018812366953284274, "loss": 0.8606, "step": 49660 }, { "epoch": 0.8720307589669762, "grad_norm": 0.06560190341048526, "learning_rate": 0.00018811837681401614, "loss": 0.8571, "step": 49670 }, { "epoch": 0.8722063238469776, "grad_norm": 0.06136277380120743, "learning_rate": 0.000188113082991383, "loss": 0.8499, "step": 49680 }, { "epoch": 0.8723818887269791, "grad_norm": 0.06328825139031169, "learning_rate": 0.0001881077880650104, "loss": 0.8492, "step": 49690 }, { "epoch": 0.8725574536069804, "grad_norm": 0.07188620823734722, "learning_rate": 0.00018810249203496545, "loss": 0.8503, "step": 49700 }, { "epoch": 0.8727330184869818, "grad_norm": 0.07767806861019007, "learning_rate": 0.00018809719490131525, "loss": 0.846, "step": 49710 }, { "epoch": 0.8729085833669833, "grad_norm": 0.0709882756334045, "learning_rate": 0.00018809189666412689, "loss": 0.8541, "step": 49720 }, { "epoch": 0.8730841482469847, "grad_norm": 0.05343518595704108, "learning_rate": 0.0001880865973234675, "loss": 0.8502, "step": 49730 }, { "epoch": 0.873259713126986, "grad_norm": 0.06184675795962728, "learning_rate": 0.0001880812968794042, "loss": 0.8576, "step": 49740 }, { "epoch": 0.8734352780069875, "grad_norm": 0.047430287909337204, "learning_rate": 0.00018807599533200419, "loss": 0.8613, "step": 49750 }, { "epoch": 0.8736108428869889, "grad_norm": 0.05475508217784738, "learning_rate": 0.00018807069268133466, "loss": 0.8512, "step": 49760 }, { "epoch": 0.8737864077669902, "grad_norm": 0.062775226102437, "learning_rate": 0.00018806538892746278, "loss": 0.8532, "step": 49770 }, { "epoch": 0.8739619726469917, "grad_norm": 0.05848647337023254, "learning_rate": 0.0001880600840704557, "loss": 0.8549, "step": 49780 }, { "epoch": 0.8741375375269931, "grad_norm": 0.09320631959790206, "learning_rate": 0.00018805477811038065, "loss": 0.8532, "step": 49790 }, { "epoch": 0.8743131024069946, "grad_norm": 0.07906292996812664, "learning_rate": 0.0001880494710473049, "loss": 0.8527, "step": 49800 }, { "epoch": 0.8744886672869959, "grad_norm": 0.059778882659791574, "learning_rate": 0.0001880441628812957, "loss": 0.8446, "step": 49810 }, { "epoch": 0.8746642321669973, "grad_norm": 0.0742125099729129, "learning_rate": 0.00018803885361242024, "loss": 0.8516, "step": 49820 }, { "epoch": 0.8748397970469988, "grad_norm": 0.05702249813898268, "learning_rate": 0.00018803354324074584, "loss": 0.8631, "step": 49830 }, { "epoch": 0.8750153619270001, "grad_norm": 0.07605071846905573, "learning_rate": 0.00018802823176633976, "loss": 0.8572, "step": 49840 }, { "epoch": 0.8751909268070015, "grad_norm": 0.0919338022832953, "learning_rate": 0.0001880229191892693, "loss": 0.8449, "step": 49850 }, { "epoch": 0.875366491687003, "grad_norm": 0.06261192103594504, "learning_rate": 0.00018801760550960178, "loss": 0.8497, "step": 49860 }, { "epoch": 0.8755420565670043, "grad_norm": 0.06571108046665391, "learning_rate": 0.00018801229072740451, "loss": 0.8559, "step": 49870 }, { "epoch": 0.8757176214470057, "grad_norm": 0.09122124643947176, "learning_rate": 0.00018800697484274486, "loss": 0.8493, "step": 49880 }, { "epoch": 0.8758931863270072, "grad_norm": 0.06766242905017707, "learning_rate": 0.00018800165785569017, "loss": 0.8638, "step": 49890 }, { "epoch": 0.8760687512070086, "grad_norm": 0.06343494440466516, "learning_rate": 0.00018799633976630776, "loss": 0.8512, "step": 49900 }, { "epoch": 0.8762443160870099, "grad_norm": 0.06071990326635008, "learning_rate": 0.00018799102057466508, "loss": 0.8475, "step": 49910 }, { "epoch": 0.8764198809670114, "grad_norm": 0.04962592179579309, "learning_rate": 0.00018798570028082952, "loss": 0.8505, "step": 49920 }, { "epoch": 0.8765954458470128, "grad_norm": 0.06787401383947338, "learning_rate": 0.00018798037888486844, "loss": 0.8485, "step": 49930 }, { "epoch": 0.8767710107270141, "grad_norm": 0.06459568030407734, "learning_rate": 0.00018797505638684927, "loss": 0.8595, "step": 49940 }, { "epoch": 0.8769465756070156, "grad_norm": 0.12618848710398378, "learning_rate": 0.00018796973278683952, "loss": 0.8517, "step": 49950 }, { "epoch": 0.877122140487017, "grad_norm": 0.07333752737059247, "learning_rate": 0.00018796440808490654, "loss": 0.855, "step": 49960 }, { "epoch": 0.8772977053670183, "grad_norm": 0.05114884299288853, "learning_rate": 0.00018795908228111787, "loss": 0.8485, "step": 49970 }, { "epoch": 0.8774732702470198, "grad_norm": 0.07377974190241317, "learning_rate": 0.00018795375537554093, "loss": 0.8559, "step": 49980 }, { "epoch": 0.8776488351270212, "grad_norm": 0.06300737898558817, "learning_rate": 0.0001879484273682433, "loss": 0.8484, "step": 49990 }, { "epoch": 0.8778244000070226, "grad_norm": 0.07291594808784989, "learning_rate": 0.0001879430982592924, "loss": 0.8571, "step": 50000 }, { "epoch": 0.877999964887024, "grad_norm": 0.05935889173319087, "learning_rate": 0.00018793776804875578, "loss": 0.8572, "step": 50010 }, { "epoch": 0.8781755297670254, "grad_norm": 0.05890201175848357, "learning_rate": 0.00018793243673670098, "loss": 0.8484, "step": 50020 }, { "epoch": 0.8783510946470268, "grad_norm": 0.09039413968526228, "learning_rate": 0.00018792710432319557, "loss": 0.8436, "step": 50030 }, { "epoch": 0.8785266595270282, "grad_norm": 0.060327694558921806, "learning_rate": 0.000187921770808307, "loss": 0.8511, "step": 50040 }, { "epoch": 0.8787022244070296, "grad_norm": 0.055699877980463885, "learning_rate": 0.00018791643619210302, "loss": 0.852, "step": 50050 }, { "epoch": 0.878877789287031, "grad_norm": 0.061669739866279036, "learning_rate": 0.00018791110047465114, "loss": 0.8456, "step": 50060 }, { "epoch": 0.8790533541670325, "grad_norm": 0.07473816716979069, "learning_rate": 0.00018790576365601894, "loss": 0.8538, "step": 50070 }, { "epoch": 0.8792289190470338, "grad_norm": 0.06806125926994747, "learning_rate": 0.00018790042573627405, "loss": 0.8474, "step": 50080 }, { "epoch": 0.8794044839270352, "grad_norm": 0.07498377086572416, "learning_rate": 0.0001878950867154841, "loss": 0.8513, "step": 50090 }, { "epoch": 0.8795800488070367, "grad_norm": 0.07729645614303624, "learning_rate": 0.00018788974659371676, "loss": 0.8494, "step": 50100 }, { "epoch": 0.879755613687038, "grad_norm": 0.05279082736160683, "learning_rate": 0.0001878844053710397, "loss": 0.8591, "step": 50110 }, { "epoch": 0.8799311785670394, "grad_norm": 0.07782662190638033, "learning_rate": 0.00018787906304752055, "loss": 0.848, "step": 50120 }, { "epoch": 0.8801067434470409, "grad_norm": 0.05097926675055242, "learning_rate": 0.00018787371962322702, "loss": 0.8507, "step": 50130 }, { "epoch": 0.8802823083270422, "grad_norm": 0.06309680642501553, "learning_rate": 0.00018786837509822684, "loss": 0.8568, "step": 50140 }, { "epoch": 0.8804578732070437, "grad_norm": 0.059906092486743764, "learning_rate": 0.00018786302947258767, "loss": 0.8513, "step": 50150 }, { "epoch": 0.8806334380870451, "grad_norm": 0.08045060186324872, "learning_rate": 0.00018785768274637724, "loss": 0.8539, "step": 50160 }, { "epoch": 0.8808090029670465, "grad_norm": 0.05841389111604709, "learning_rate": 0.0001878523349196634, "loss": 0.8527, "step": 50170 }, { "epoch": 0.8809845678470479, "grad_norm": 0.062025225657058695, "learning_rate": 0.0001878469859925138, "loss": 0.8508, "step": 50180 }, { "epoch": 0.8811601327270493, "grad_norm": 0.0563731986835495, "learning_rate": 0.00018784163596499625, "loss": 0.8486, "step": 50190 }, { "epoch": 0.8813356976070507, "grad_norm": 0.06720194979955521, "learning_rate": 0.0001878362848371785, "loss": 0.8495, "step": 50200 }, { "epoch": 0.8815112624870521, "grad_norm": 0.06572342232097442, "learning_rate": 0.00018783093260912837, "loss": 0.8455, "step": 50210 }, { "epoch": 0.8816868273670535, "grad_norm": 0.058304307072872104, "learning_rate": 0.0001878255792809137, "loss": 0.859, "step": 50220 }, { "epoch": 0.8818623922470549, "grad_norm": 0.05079681376879491, "learning_rate": 0.00018782022485260233, "loss": 0.863, "step": 50230 }, { "epoch": 0.8820379571270563, "grad_norm": 0.048737427759766604, "learning_rate": 0.00018781486932426206, "loss": 0.8578, "step": 50240 }, { "epoch": 0.8822135220070577, "grad_norm": 0.06922719626290813, "learning_rate": 0.0001878095126959607, "loss": 0.8516, "step": 50250 }, { "epoch": 0.8823890868870591, "grad_norm": 0.0658364786770367, "learning_rate": 0.00018780415496776626, "loss": 0.8513, "step": 50260 }, { "epoch": 0.8825646517670606, "grad_norm": 0.08600908390717305, "learning_rate": 0.0001877987961397465, "loss": 0.8562, "step": 50270 }, { "epoch": 0.8827402166470619, "grad_norm": 0.048597086375931145, "learning_rate": 0.00018779343621196936, "loss": 0.8582, "step": 50280 }, { "epoch": 0.8829157815270633, "grad_norm": 0.06655191472683604, "learning_rate": 0.00018778807518450276, "loss": 0.8511, "step": 50290 }, { "epoch": 0.8830913464070648, "grad_norm": 0.06844144405246301, "learning_rate": 0.00018778271305741463, "loss": 0.8572, "step": 50300 }, { "epoch": 0.8832669112870661, "grad_norm": 0.060747148464578835, "learning_rate": 0.00018777734983077283, "loss": 0.8544, "step": 50310 }, { "epoch": 0.8834424761670675, "grad_norm": 0.05463920115207388, "learning_rate": 0.00018777198550464543, "loss": 0.8422, "step": 50320 }, { "epoch": 0.883618041047069, "grad_norm": 0.07672073816462882, "learning_rate": 0.00018776662007910036, "loss": 0.8437, "step": 50330 }, { "epoch": 0.8837936059270703, "grad_norm": 0.08209000338779424, "learning_rate": 0.00018776125355420554, "loss": 0.8551, "step": 50340 }, { "epoch": 0.8839691708070717, "grad_norm": 0.0629576260133559, "learning_rate": 0.00018775588593002905, "loss": 0.8382, "step": 50350 }, { "epoch": 0.8841447356870732, "grad_norm": 0.07900218299967032, "learning_rate": 0.0001877505172066388, "loss": 0.8533, "step": 50360 }, { "epoch": 0.8843203005670746, "grad_norm": 0.06344414424730535, "learning_rate": 0.00018774514738410293, "loss": 0.8556, "step": 50370 }, { "epoch": 0.8844958654470759, "grad_norm": 0.06498147851017197, "learning_rate": 0.00018773977646248938, "loss": 0.8463, "step": 50380 }, { "epoch": 0.8846714303270774, "grad_norm": 0.056786249767475636, "learning_rate": 0.00018773440444186626, "loss": 0.8466, "step": 50390 }, { "epoch": 0.8848469952070788, "grad_norm": 0.0719349882029267, "learning_rate": 0.0001877290313223016, "loss": 0.8489, "step": 50400 }, { "epoch": 0.8850225600870801, "grad_norm": 0.08767560213562822, "learning_rate": 0.0001877236571038635, "loss": 0.8468, "step": 50410 }, { "epoch": 0.8851981249670816, "grad_norm": 0.07095625761398223, "learning_rate": 0.00018771828178662002, "loss": 0.8547, "step": 50420 }, { "epoch": 0.885373689847083, "grad_norm": 0.07453826509152775, "learning_rate": 0.00018771290537063933, "loss": 0.8574, "step": 50430 }, { "epoch": 0.8855492547270843, "grad_norm": 0.08598926144800784, "learning_rate": 0.0001877075278559895, "loss": 0.852, "step": 50440 }, { "epoch": 0.8857248196070858, "grad_norm": 0.05124587250438287, "learning_rate": 0.00018770214924273865, "loss": 0.8578, "step": 50450 }, { "epoch": 0.8859003844870872, "grad_norm": 0.058731166838281376, "learning_rate": 0.000187696769530955, "loss": 0.8614, "step": 50460 }, { "epoch": 0.8860759493670886, "grad_norm": 0.0588161563323831, "learning_rate": 0.00018769138872070664, "loss": 0.8582, "step": 50470 }, { "epoch": 0.88625151424709, "grad_norm": 0.061077447572731924, "learning_rate": 0.00018768600681206174, "loss": 0.8556, "step": 50480 }, { "epoch": 0.8864270791270914, "grad_norm": 0.08074596310023444, "learning_rate": 0.00018768062380508855, "loss": 0.8466, "step": 50490 }, { "epoch": 0.8866026440070928, "grad_norm": 0.05960405245509958, "learning_rate": 0.00018767523969985523, "loss": 0.8503, "step": 50500 }, { "epoch": 0.8867782088870942, "grad_norm": 0.05011262738212449, "learning_rate": 0.00018766985449643003, "loss": 0.8386, "step": 50510 }, { "epoch": 0.8869537737670956, "grad_norm": 0.07195515317225881, "learning_rate": 0.00018766446819488115, "loss": 0.8534, "step": 50520 }, { "epoch": 0.8871293386470971, "grad_norm": 0.057774067965602846, "learning_rate": 0.00018765908079527687, "loss": 0.8515, "step": 50530 }, { "epoch": 0.8873049035270985, "grad_norm": 0.055419256095997925, "learning_rate": 0.0001876536922976854, "loss": 0.8625, "step": 50540 }, { "epoch": 0.8874804684070998, "grad_norm": 0.05743085419748505, "learning_rate": 0.00018764830270217507, "loss": 0.8538, "step": 50550 }, { "epoch": 0.8876560332871013, "grad_norm": 0.0657729277511328, "learning_rate": 0.0001876429120088141, "loss": 0.8535, "step": 50560 }, { "epoch": 0.8878315981671027, "grad_norm": 0.0684849413667164, "learning_rate": 0.00018763752021767088, "loss": 0.8548, "step": 50570 }, { "epoch": 0.888007163047104, "grad_norm": 0.09752218769112139, "learning_rate": 0.0001876321273288136, "loss": 0.858, "step": 50580 }, { "epoch": 0.8881827279271055, "grad_norm": 0.055367785973738114, "learning_rate": 0.00018762673334231074, "loss": 0.8421, "step": 50590 }, { "epoch": 0.8883582928071069, "grad_norm": 0.06489605138795157, "learning_rate": 0.00018762133825823055, "loss": 0.8417, "step": 50600 }, { "epoch": 0.8885338576871082, "grad_norm": 0.059301369840885396, "learning_rate": 0.00018761594207664139, "loss": 0.8555, "step": 50610 }, { "epoch": 0.8887094225671097, "grad_norm": 0.08343677483142059, "learning_rate": 0.00018761054479761162, "loss": 0.8569, "step": 50620 }, { "epoch": 0.8888849874471111, "grad_norm": 0.07230571919006619, "learning_rate": 0.0001876051464212097, "loss": 0.8509, "step": 50630 }, { "epoch": 0.8890605523271125, "grad_norm": 0.06441246691466639, "learning_rate": 0.00018759974694750392, "loss": 0.8575, "step": 50640 }, { "epoch": 0.8892361172071139, "grad_norm": 0.07488378489975694, "learning_rate": 0.0001875943463765628, "loss": 0.8529, "step": 50650 }, { "epoch": 0.8894116820871153, "grad_norm": 0.09454160678605143, "learning_rate": 0.0001875889447084547, "loss": 0.8439, "step": 50660 }, { "epoch": 0.8895872469671167, "grad_norm": 0.055148941893185525, "learning_rate": 0.00018758354194324805, "loss": 0.8559, "step": 50670 }, { "epoch": 0.8897628118471181, "grad_norm": 0.07033487732007443, "learning_rate": 0.00018757813808101137, "loss": 0.8451, "step": 50680 }, { "epoch": 0.8899383767271195, "grad_norm": 0.054140078983721036, "learning_rate": 0.00018757273312181307, "loss": 0.8492, "step": 50690 }, { "epoch": 0.8901139416071209, "grad_norm": 0.07438392329279504, "learning_rate": 0.00018756732706572165, "loss": 0.8492, "step": 50700 }, { "epoch": 0.8902895064871224, "grad_norm": 0.06166652158062049, "learning_rate": 0.00018756191991280561, "loss": 0.8491, "step": 50710 }, { "epoch": 0.8904650713671237, "grad_norm": 0.06462639490047087, "learning_rate": 0.0001875565116631334, "loss": 0.842, "step": 50720 }, { "epoch": 0.8906406362471251, "grad_norm": 0.04833005144601604, "learning_rate": 0.00018755110231677367, "loss": 0.851, "step": 50730 }, { "epoch": 0.8908162011271266, "grad_norm": 0.05006604313706428, "learning_rate": 0.00018754569187379486, "loss": 0.85, "step": 50740 }, { "epoch": 0.8909917660071279, "grad_norm": 0.11286663825746192, "learning_rate": 0.00018754028033426558, "loss": 0.8491, "step": 50750 }, { "epoch": 0.8911673308871293, "grad_norm": 0.07399246488788726, "learning_rate": 0.0001875348676982543, "loss": 0.8483, "step": 50760 }, { "epoch": 0.8913428957671308, "grad_norm": 0.05764747832325426, "learning_rate": 0.00018752945396582972, "loss": 0.8516, "step": 50770 }, { "epoch": 0.8915184606471321, "grad_norm": 0.05413863695093473, "learning_rate": 0.00018752403913706035, "loss": 0.8521, "step": 50780 }, { "epoch": 0.8916940255271335, "grad_norm": 0.061223666129464796, "learning_rate": 0.0001875186232120148, "loss": 0.8492, "step": 50790 }, { "epoch": 0.891869590407135, "grad_norm": 0.07872445302798205, "learning_rate": 0.00018751320619076175, "loss": 0.851, "step": 50800 }, { "epoch": 0.8920451552871363, "grad_norm": 0.07440618035702816, "learning_rate": 0.00018750778807336978, "loss": 0.8539, "step": 50810 }, { "epoch": 0.8922207201671377, "grad_norm": 0.07819704553344695, "learning_rate": 0.00018750236885990758, "loss": 0.854, "step": 50820 }, { "epoch": 0.8923962850471392, "grad_norm": 0.05564502300218891, "learning_rate": 0.00018749694855044378, "loss": 0.8539, "step": 50830 }, { "epoch": 0.8925718499271406, "grad_norm": 0.07229397836362848, "learning_rate": 0.00018749152714504704, "loss": 0.8507, "step": 50840 }, { "epoch": 0.8927474148071419, "grad_norm": 0.06618759003450547, "learning_rate": 0.0001874861046437861, "loss": 0.853, "step": 50850 }, { "epoch": 0.8929229796871434, "grad_norm": 0.0662224243557811, "learning_rate": 0.0001874806810467296, "loss": 0.8496, "step": 50860 }, { "epoch": 0.8930985445671448, "grad_norm": 0.079592667915976, "learning_rate": 0.00018747525635394633, "loss": 0.8562, "step": 50870 }, { "epoch": 0.8932741094471462, "grad_norm": 0.0537661633281545, "learning_rate": 0.00018746983056550497, "loss": 0.8455, "step": 50880 }, { "epoch": 0.8934496743271476, "grad_norm": 0.07627089021108606, "learning_rate": 0.00018746440368147432, "loss": 0.8502, "step": 50890 }, { "epoch": 0.893625239207149, "grad_norm": 0.07235968532612647, "learning_rate": 0.00018745897570192306, "loss": 0.8555, "step": 50900 }, { "epoch": 0.8938008040871505, "grad_norm": 0.0635589775961434, "learning_rate": 0.00018745354662692005, "loss": 0.8473, "step": 50910 }, { "epoch": 0.8939763689671518, "grad_norm": 0.07101162408551163, "learning_rate": 0.000187448116456534, "loss": 0.8548, "step": 50920 }, { "epoch": 0.8941519338471532, "grad_norm": 0.07490168341255175, "learning_rate": 0.0001874426851908338, "loss": 0.8608, "step": 50930 }, { "epoch": 0.8943274987271547, "grad_norm": 0.060437415146500244, "learning_rate": 0.00018743725282988814, "loss": 0.8542, "step": 50940 }, { "epoch": 0.894503063607156, "grad_norm": 0.060413964959036344, "learning_rate": 0.00018743181937376597, "loss": 0.8517, "step": 50950 }, { "epoch": 0.8946786284871574, "grad_norm": 0.05364845008372144, "learning_rate": 0.00018742638482253606, "loss": 0.8424, "step": 50960 }, { "epoch": 0.8948541933671589, "grad_norm": 0.05161128907783689, "learning_rate": 0.00018742094917626728, "loss": 0.8536, "step": 50970 }, { "epoch": 0.8950297582471602, "grad_norm": 0.06004069975335875, "learning_rate": 0.00018741551243502857, "loss": 0.8489, "step": 50980 }, { "epoch": 0.8952053231271616, "grad_norm": 0.06174598660753231, "learning_rate": 0.0001874100745988887, "loss": 0.8528, "step": 50990 }, { "epoch": 0.8953808880071631, "grad_norm": 0.06817657238512229, "learning_rate": 0.00018740463566791667, "loss": 0.8482, "step": 51000 }, { "epoch": 0.8955564528871645, "grad_norm": 0.07788965075578769, "learning_rate": 0.0001873991956421813, "loss": 0.8592, "step": 51010 }, { "epoch": 0.8957320177671658, "grad_norm": 0.06630833845797687, "learning_rate": 0.00018739375452175158, "loss": 0.8533, "step": 51020 }, { "epoch": 0.8959075826471673, "grad_norm": 0.0894381070079055, "learning_rate": 0.00018738831230669643, "loss": 0.8605, "step": 51030 }, { "epoch": 0.8960831475271687, "grad_norm": 0.06316342236467318, "learning_rate": 0.00018738286899708484, "loss": 0.854, "step": 51040 }, { "epoch": 0.89625871240717, "grad_norm": 0.06632657528081777, "learning_rate": 0.00018737742459298568, "loss": 0.8563, "step": 51050 }, { "epoch": 0.8964342772871715, "grad_norm": 0.08668069840339993, "learning_rate": 0.00018737197909446805, "loss": 0.8599, "step": 51060 }, { "epoch": 0.8966098421671729, "grad_norm": 0.0752222136258506, "learning_rate": 0.00018736653250160085, "loss": 0.8524, "step": 51070 }, { "epoch": 0.8967854070471742, "grad_norm": 0.0715680055744508, "learning_rate": 0.0001873610848144532, "loss": 0.8535, "step": 51080 }, { "epoch": 0.8969609719271757, "grad_norm": 0.08335870179528601, "learning_rate": 0.00018735563603309397, "loss": 0.8565, "step": 51090 }, { "epoch": 0.8971365368071771, "grad_norm": 0.04910302970986397, "learning_rate": 0.00018735018615759233, "loss": 0.8543, "step": 51100 }, { "epoch": 0.8973121016871785, "grad_norm": 0.05952412430306795, "learning_rate": 0.0001873447351880173, "loss": 0.8531, "step": 51110 }, { "epoch": 0.8974876665671799, "grad_norm": 0.07575401254822917, "learning_rate": 0.00018733928312443787, "loss": 0.8511, "step": 51120 }, { "epoch": 0.8976632314471813, "grad_norm": 0.05587606010562313, "learning_rate": 0.00018733382996692324, "loss": 0.8447, "step": 51130 }, { "epoch": 0.8978387963271827, "grad_norm": 0.07973054913717782, "learning_rate": 0.00018732837571554237, "loss": 0.8558, "step": 51140 }, { "epoch": 0.8980143612071841, "grad_norm": 0.05054581546350068, "learning_rate": 0.0001873229203703645, "loss": 0.8575, "step": 51150 }, { "epoch": 0.8981899260871855, "grad_norm": 0.05747105530930816, "learning_rate": 0.00018731746393145865, "loss": 0.8554, "step": 51160 }, { "epoch": 0.8983654909671869, "grad_norm": 0.09504366680475498, "learning_rate": 0.000187312006398894, "loss": 0.8587, "step": 51170 }, { "epoch": 0.8985410558471884, "grad_norm": 0.04989276656763578, "learning_rate": 0.00018730654777273967, "loss": 0.857, "step": 51180 }, { "epoch": 0.8987166207271897, "grad_norm": 0.09837991227506145, "learning_rate": 0.00018730108805306483, "loss": 0.8524, "step": 51190 }, { "epoch": 0.8988921856071911, "grad_norm": 0.06676704434023024, "learning_rate": 0.0001872956272399387, "loss": 0.8508, "step": 51200 }, { "epoch": 0.8990677504871926, "grad_norm": 0.0479009075196238, "learning_rate": 0.00018729016533343044, "loss": 0.8557, "step": 51210 }, { "epoch": 0.8992433153671939, "grad_norm": 0.05177292152460493, "learning_rate": 0.00018728470233360923, "loss": 0.8519, "step": 51220 }, { "epoch": 0.8994188802471953, "grad_norm": 0.058260281732208236, "learning_rate": 0.00018727923824054428, "loss": 0.8526, "step": 51230 }, { "epoch": 0.8995944451271968, "grad_norm": 0.07283496311594259, "learning_rate": 0.00018727377305430488, "loss": 0.8533, "step": 51240 }, { "epoch": 0.8997700100071981, "grad_norm": 0.05769298961233435, "learning_rate": 0.00018726830677496024, "loss": 0.8583, "step": 51250 }, { "epoch": 0.8999455748871996, "grad_norm": 0.08543911422606003, "learning_rate": 0.00018726283940257961, "loss": 0.8479, "step": 51260 }, { "epoch": 0.900121139767201, "grad_norm": 0.05773565404703946, "learning_rate": 0.00018725737093723226, "loss": 0.8422, "step": 51270 }, { "epoch": 0.9002967046472024, "grad_norm": 0.061184935483674284, "learning_rate": 0.00018725190137898752, "loss": 0.8532, "step": 51280 }, { "epoch": 0.9004722695272038, "grad_norm": 0.08730325528106753, "learning_rate": 0.00018724643072791464, "loss": 0.8472, "step": 51290 }, { "epoch": 0.9006478344072052, "grad_norm": 0.05057529637453755, "learning_rate": 0.00018724095898408296, "loss": 0.8419, "step": 51300 }, { "epoch": 0.9008233992872066, "grad_norm": 0.0883348252168898, "learning_rate": 0.00018723548614756177, "loss": 0.8577, "step": 51310 }, { "epoch": 0.900998964167208, "grad_norm": 0.0951062085074144, "learning_rate": 0.00018723001221842048, "loss": 0.8584, "step": 51320 }, { "epoch": 0.9011745290472094, "grad_norm": 0.06836967423866752, "learning_rate": 0.0001872245371967284, "loss": 0.8558, "step": 51330 }, { "epoch": 0.9013500939272108, "grad_norm": 0.07200103460269672, "learning_rate": 0.0001872190610825549, "loss": 0.853, "step": 51340 }, { "epoch": 0.9015256588072122, "grad_norm": 0.081017221593092, "learning_rate": 0.0001872135838759694, "loss": 0.8524, "step": 51350 }, { "epoch": 0.9017012236872136, "grad_norm": 0.06077713550819009, "learning_rate": 0.00018720810557704124, "loss": 0.8509, "step": 51360 }, { "epoch": 0.901876788567215, "grad_norm": 0.07977634291198864, "learning_rate": 0.0001872026261858399, "loss": 0.8491, "step": 51370 }, { "epoch": 0.9020523534472165, "grad_norm": 0.0805959761413738, "learning_rate": 0.0001871971457024347, "loss": 0.8527, "step": 51380 }, { "epoch": 0.9022279183272178, "grad_norm": 0.058383068052664466, "learning_rate": 0.0001871916641268952, "loss": 0.8525, "step": 51390 }, { "epoch": 0.9024034832072192, "grad_norm": 0.06935404418889989, "learning_rate": 0.00018718618145929076, "loss": 0.8599, "step": 51400 }, { "epoch": 0.9025790480872207, "grad_norm": 0.07021228161858148, "learning_rate": 0.00018718069769969092, "loss": 0.8501, "step": 51410 }, { "epoch": 0.902754612967222, "grad_norm": 0.054957455753611203, "learning_rate": 0.00018717521284816507, "loss": 0.8427, "step": 51420 }, { "epoch": 0.9029301778472234, "grad_norm": 0.05722356238779132, "learning_rate": 0.00018716972690478284, "loss": 0.852, "step": 51430 }, { "epoch": 0.9031057427272249, "grad_norm": 0.057422061275585086, "learning_rate": 0.0001871642398696136, "loss": 0.8484, "step": 51440 }, { "epoch": 0.9032813076072262, "grad_norm": 0.071405557652343, "learning_rate": 0.00018715875174272692, "loss": 0.8492, "step": 51450 }, { "epoch": 0.9034568724872276, "grad_norm": 0.06098827878157956, "learning_rate": 0.00018715326252419237, "loss": 0.8539, "step": 51460 }, { "epoch": 0.9036324373672291, "grad_norm": 0.07434703139923607, "learning_rate": 0.00018714777221407944, "loss": 0.849, "step": 51470 }, { "epoch": 0.9038080022472305, "grad_norm": 0.05501481372601671, "learning_rate": 0.00018714228081245776, "loss": 0.852, "step": 51480 }, { "epoch": 0.9039835671272318, "grad_norm": 0.06745530243237322, "learning_rate": 0.00018713678831939687, "loss": 0.8528, "step": 51490 }, { "epoch": 0.9041591320072333, "grad_norm": 0.0917293059279573, "learning_rate": 0.00018713129473496635, "loss": 0.8521, "step": 51500 }, { "epoch": 0.9043346968872347, "grad_norm": 0.06385416650910296, "learning_rate": 0.0001871258000592358, "loss": 0.8591, "step": 51510 }, { "epoch": 0.904510261767236, "grad_norm": 0.0937803135721276, "learning_rate": 0.0001871203042922749, "loss": 0.8526, "step": 51520 }, { "epoch": 0.9046858266472375, "grad_norm": 0.07135346202535937, "learning_rate": 0.00018711480743415323, "loss": 0.8478, "step": 51530 }, { "epoch": 0.9048613915272389, "grad_norm": 0.06604332855784446, "learning_rate": 0.00018710930948494044, "loss": 0.856, "step": 51540 }, { "epoch": 0.9050369564072402, "grad_norm": 0.055563031904983684, "learning_rate": 0.00018710381044470618, "loss": 0.8511, "step": 51550 }, { "epoch": 0.9052125212872417, "grad_norm": 0.04691697813180817, "learning_rate": 0.00018709831031352018, "loss": 0.8537, "step": 51560 }, { "epoch": 0.9053880861672431, "grad_norm": 0.06974836996418313, "learning_rate": 0.00018709280909145204, "loss": 0.849, "step": 51570 }, { "epoch": 0.9055636510472445, "grad_norm": 0.05162057428800077, "learning_rate": 0.00018708730677857155, "loss": 0.8518, "step": 51580 }, { "epoch": 0.9057392159272459, "grad_norm": 0.06168068695560914, "learning_rate": 0.00018708180337494835, "loss": 0.8523, "step": 51590 }, { "epoch": 0.9059147808072473, "grad_norm": 0.06938583524653129, "learning_rate": 0.0001870762988806522, "loss": 0.8486, "step": 51600 }, { "epoch": 0.9060903456872488, "grad_norm": 0.0638898046567099, "learning_rate": 0.00018707079329575288, "loss": 0.8539, "step": 51610 }, { "epoch": 0.9062659105672501, "grad_norm": 0.07018915828672245, "learning_rate": 0.0001870652866203201, "loss": 0.8534, "step": 51620 }, { "epoch": 0.9064414754472515, "grad_norm": 0.046090136216283877, "learning_rate": 0.00018705977885442364, "loss": 0.857, "step": 51630 }, { "epoch": 0.906617040327253, "grad_norm": 0.07037915251362557, "learning_rate": 0.00018705426999813328, "loss": 0.8453, "step": 51640 }, { "epoch": 0.9067926052072544, "grad_norm": 0.07535866724888861, "learning_rate": 0.00018704876005151884, "loss": 0.8577, "step": 51650 }, { "epoch": 0.9069681700872557, "grad_norm": 0.07264116039965367, "learning_rate": 0.00018704324901465012, "loss": 0.8444, "step": 51660 }, { "epoch": 0.9071437349672572, "grad_norm": 0.07257380989003108, "learning_rate": 0.00018703773688759695, "loss": 0.8482, "step": 51670 }, { "epoch": 0.9073192998472586, "grad_norm": 0.06273954295336075, "learning_rate": 0.00018703222367042914, "loss": 0.8493, "step": 51680 }, { "epoch": 0.9074948647272599, "grad_norm": 0.06594074151088294, "learning_rate": 0.00018702670936321657, "loss": 0.8453, "step": 51690 }, { "epoch": 0.9076704296072614, "grad_norm": 0.054860343960016524, "learning_rate": 0.00018702119396602914, "loss": 0.8592, "step": 51700 }, { "epoch": 0.9078459944872628, "grad_norm": 0.07839057289268589, "learning_rate": 0.00018701567747893668, "loss": 0.8503, "step": 51710 }, { "epoch": 0.9080215593672641, "grad_norm": 0.11486753116904445, "learning_rate": 0.0001870101599020091, "loss": 0.8588, "step": 51720 }, { "epoch": 0.9081971242472656, "grad_norm": 0.05417022434882022, "learning_rate": 0.00018700464123531628, "loss": 0.8496, "step": 51730 }, { "epoch": 0.908372689127267, "grad_norm": 0.0743336384380771, "learning_rate": 0.0001869991214789282, "loss": 0.8463, "step": 51740 }, { "epoch": 0.9085482540072684, "grad_norm": 0.07421877683135258, "learning_rate": 0.00018699360063291481, "loss": 0.85, "step": 51750 }, { "epoch": 0.9087238188872698, "grad_norm": 0.0566229441644477, "learning_rate": 0.00018698807869734597, "loss": 0.8523, "step": 51760 }, { "epoch": 0.9088993837672712, "grad_norm": 0.06055307498137061, "learning_rate": 0.00018698255567229172, "loss": 0.8603, "step": 51770 }, { "epoch": 0.9090749486472726, "grad_norm": 0.05978067195809929, "learning_rate": 0.00018697703155782198, "loss": 0.8563, "step": 51780 }, { "epoch": 0.909250513527274, "grad_norm": 0.08855716942748568, "learning_rate": 0.0001869715063540068, "loss": 0.8521, "step": 51790 }, { "epoch": 0.9094260784072754, "grad_norm": 0.053334483267376886, "learning_rate": 0.0001869659800609161, "loss": 0.8526, "step": 51800 }, { "epoch": 0.9096016432872768, "grad_norm": 0.05719583992685535, "learning_rate": 0.00018696045267862002, "loss": 0.854, "step": 51810 }, { "epoch": 0.9097772081672783, "grad_norm": 0.058139479633446775, "learning_rate": 0.0001869549242071885, "loss": 0.8581, "step": 51820 }, { "epoch": 0.9099527730472796, "grad_norm": 0.09909422402032068, "learning_rate": 0.00018694939464669166, "loss": 0.852, "step": 51830 }, { "epoch": 0.910128337927281, "grad_norm": 0.08315666330915147, "learning_rate": 0.00018694386399719948, "loss": 0.8543, "step": 51840 }, { "epoch": 0.9103039028072825, "grad_norm": 0.07227655058015592, "learning_rate": 0.00018693833225878208, "loss": 0.8561, "step": 51850 }, { "epoch": 0.9104794676872838, "grad_norm": 0.07471051064194667, "learning_rate": 0.00018693279943150955, "loss": 0.8552, "step": 51860 }, { "epoch": 0.9106550325672852, "grad_norm": 0.06757823760933587, "learning_rate": 0.00018692726551545197, "loss": 0.8493, "step": 51870 }, { "epoch": 0.9108305974472867, "grad_norm": 0.07208701991578528, "learning_rate": 0.00018692173051067945, "loss": 0.8533, "step": 51880 }, { "epoch": 0.911006162327288, "grad_norm": 0.07689853664374548, "learning_rate": 0.00018691619441726218, "loss": 0.8501, "step": 51890 }, { "epoch": 0.9111817272072894, "grad_norm": 0.05229901664777416, "learning_rate": 0.0001869106572352702, "loss": 0.8502, "step": 51900 }, { "epoch": 0.9113572920872909, "grad_norm": 0.08048460942485705, "learning_rate": 0.0001869051189647738, "loss": 0.8595, "step": 51910 }, { "epoch": 0.9115328569672922, "grad_norm": 0.06252718945206524, "learning_rate": 0.00018689957960584303, "loss": 0.8522, "step": 51920 }, { "epoch": 0.9117084218472936, "grad_norm": 0.06420993926211635, "learning_rate": 0.00018689403915854815, "loss": 0.8556, "step": 51930 }, { "epoch": 0.9118839867272951, "grad_norm": 0.05972521515844701, "learning_rate": 0.00018688849762295932, "loss": 0.8507, "step": 51940 }, { "epoch": 0.9120595516072965, "grad_norm": 0.08916823560454058, "learning_rate": 0.00018688295499914677, "loss": 0.8533, "step": 51950 }, { "epoch": 0.9122351164872978, "grad_norm": 0.047221112158325876, "learning_rate": 0.0001868774112871807, "loss": 0.8509, "step": 51960 }, { "epoch": 0.9124106813672993, "grad_norm": 0.05885773791636452, "learning_rate": 0.0001868718664871314, "loss": 0.8502, "step": 51970 }, { "epoch": 0.9125862462473007, "grad_norm": 0.07393128542937837, "learning_rate": 0.0001868663205990691, "loss": 0.8506, "step": 51980 }, { "epoch": 0.9127618111273021, "grad_norm": 0.053474003204453106, "learning_rate": 0.00018686077362306403, "loss": 0.857, "step": 51990 }, { "epoch": 0.9129373760073035, "grad_norm": 0.07016446383066285, "learning_rate": 0.00018685522555918652, "loss": 0.8498, "step": 52000 }, { "epoch": 0.9131129408873049, "grad_norm": 0.06450010188599384, "learning_rate": 0.00018684967640750685, "loss": 0.8565, "step": 52010 }, { "epoch": 0.9132885057673064, "grad_norm": 0.059383057978195035, "learning_rate": 0.00018684412616809531, "loss": 0.8545, "step": 52020 }, { "epoch": 0.9134640706473077, "grad_norm": 0.06055350872877517, "learning_rate": 0.00018683857484102224, "loss": 0.8515, "step": 52030 }, { "epoch": 0.9136396355273091, "grad_norm": 0.07731295172914557, "learning_rate": 0.000186833022426358, "loss": 0.8608, "step": 52040 }, { "epoch": 0.9138152004073106, "grad_norm": 0.08358581128355994, "learning_rate": 0.0001868274689241729, "loss": 0.8487, "step": 52050 }, { "epoch": 0.9139907652873119, "grad_norm": 0.07780389240433859, "learning_rate": 0.00018682191433453733, "loss": 0.8532, "step": 52060 }, { "epoch": 0.9141663301673133, "grad_norm": 0.06371081858937198, "learning_rate": 0.00018681635865752167, "loss": 0.8527, "step": 52070 }, { "epoch": 0.9143418950473148, "grad_norm": 0.0649971497353415, "learning_rate": 0.00018681080189319625, "loss": 0.8564, "step": 52080 }, { "epoch": 0.9145174599273161, "grad_norm": 0.06240742809624101, "learning_rate": 0.0001868052440416316, "loss": 0.8484, "step": 52090 }, { "epoch": 0.9146930248073175, "grad_norm": 0.06926152662338432, "learning_rate": 0.00018679968510289797, "loss": 0.8468, "step": 52100 }, { "epoch": 0.914868589687319, "grad_norm": 0.07162782561706299, "learning_rate": 0.00018679412507706597, "loss": 0.8466, "step": 52110 }, { "epoch": 0.9150441545673204, "grad_norm": 0.06552992019817752, "learning_rate": 0.0001867885639642059, "loss": 0.8559, "step": 52120 }, { "epoch": 0.9152197194473217, "grad_norm": 0.06049683167712435, "learning_rate": 0.00018678300176438832, "loss": 0.8607, "step": 52130 }, { "epoch": 0.9153952843273232, "grad_norm": 0.05973556817573839, "learning_rate": 0.00018677743847768366, "loss": 0.8547, "step": 52140 }, { "epoch": 0.9155708492073246, "grad_norm": 0.06023576213489868, "learning_rate": 0.00018677187410416244, "loss": 0.8475, "step": 52150 }, { "epoch": 0.9157464140873259, "grad_norm": 0.09195817309164189, "learning_rate": 0.00018676630864389508, "loss": 0.8542, "step": 52160 }, { "epoch": 0.9159219789673274, "grad_norm": 0.05982592856083625, "learning_rate": 0.0001867607420969522, "loss": 0.8526, "step": 52170 }, { "epoch": 0.9160975438473288, "grad_norm": 0.07149588907916492, "learning_rate": 0.00018675517446340426, "loss": 0.8482, "step": 52180 }, { "epoch": 0.9162731087273301, "grad_norm": 0.08561797680253087, "learning_rate": 0.00018674960574332182, "loss": 0.8425, "step": 52190 }, { "epoch": 0.9164486736073316, "grad_norm": 0.07025912956294914, "learning_rate": 0.00018674403593677542, "loss": 0.8604, "step": 52200 }, { "epoch": 0.916624238487333, "grad_norm": 0.05957180592209723, "learning_rate": 0.0001867384650438357, "loss": 0.8562, "step": 52210 }, { "epoch": 0.9167998033673344, "grad_norm": 0.07559596662279224, "learning_rate": 0.00018673289306457322, "loss": 0.847, "step": 52220 }, { "epoch": 0.9169753682473358, "grad_norm": 0.054846149774135565, "learning_rate": 0.00018672731999905848, "loss": 0.8533, "step": 52230 }, { "epoch": 0.9171509331273372, "grad_norm": 0.09725345824588667, "learning_rate": 0.00018672174584736221, "loss": 0.854, "step": 52240 }, { "epoch": 0.9173264980073386, "grad_norm": 0.05208071014157818, "learning_rate": 0.00018671617060955499, "loss": 0.8494, "step": 52250 }, { "epoch": 0.91750206288734, "grad_norm": 0.06516121981306885, "learning_rate": 0.00018671059428570743, "loss": 0.8559, "step": 52260 }, { "epoch": 0.9176776277673414, "grad_norm": 0.06704205479096088, "learning_rate": 0.00018670501687589023, "loss": 0.8522, "step": 52270 }, { "epoch": 0.9178531926473428, "grad_norm": 0.1143323543513422, "learning_rate": 0.00018669943838017406, "loss": 0.8499, "step": 52280 }, { "epoch": 0.9180287575273443, "grad_norm": 0.06985447247199748, "learning_rate": 0.00018669385879862958, "loss": 0.8553, "step": 52290 }, { "epoch": 0.9182043224073456, "grad_norm": 0.06399618804346029, "learning_rate": 0.00018668827813132744, "loss": 0.8478, "step": 52300 }, { "epoch": 0.918379887287347, "grad_norm": 0.05594552247362219, "learning_rate": 0.00018668269637833846, "loss": 0.8505, "step": 52310 }, { "epoch": 0.9185554521673485, "grad_norm": 0.0626233503723022, "learning_rate": 0.00018667711353973325, "loss": 0.8485, "step": 52320 }, { "epoch": 0.9187310170473498, "grad_norm": 0.115000110876983, "learning_rate": 0.0001866715296155826, "loss": 0.8483, "step": 52330 }, { "epoch": 0.9189065819273513, "grad_norm": 0.061610844659925716, "learning_rate": 0.00018666594460595729, "loss": 0.8562, "step": 52340 }, { "epoch": 0.9190821468073527, "grad_norm": 0.06769035752719643, "learning_rate": 0.000186660358510928, "loss": 0.8502, "step": 52350 }, { "epoch": 0.919257711687354, "grad_norm": 0.08160933652269628, "learning_rate": 0.00018665477133056555, "loss": 0.8542, "step": 52360 }, { "epoch": 0.9194332765673555, "grad_norm": 0.06486866876619511, "learning_rate": 0.00018664918306494075, "loss": 0.8503, "step": 52370 }, { "epoch": 0.9196088414473569, "grad_norm": 0.056308127154514574, "learning_rate": 0.0001866435937141244, "loss": 0.8535, "step": 52380 }, { "epoch": 0.9197844063273583, "grad_norm": 0.07429429376100193, "learning_rate": 0.00018663800327818725, "loss": 0.8624, "step": 52390 }, { "epoch": 0.9199599712073597, "grad_norm": 0.06977378954071706, "learning_rate": 0.00018663241175720022, "loss": 0.8521, "step": 52400 }, { "epoch": 0.9201355360873611, "grad_norm": 0.048465608596834416, "learning_rate": 0.00018662681915123413, "loss": 0.8551, "step": 52410 }, { "epoch": 0.9203111009673625, "grad_norm": 0.047029091623290074, "learning_rate": 0.00018662122546035981, "loss": 0.8452, "step": 52420 }, { "epoch": 0.9204866658473639, "grad_norm": 0.0854078755673119, "learning_rate": 0.00018661563068464814, "loss": 0.8473, "step": 52430 }, { "epoch": 0.9206622307273653, "grad_norm": 0.08417946525633584, "learning_rate": 0.00018661003482417008, "loss": 0.8479, "step": 52440 }, { "epoch": 0.9208377956073667, "grad_norm": 0.064585257171811, "learning_rate": 0.00018660443787899645, "loss": 0.8552, "step": 52450 }, { "epoch": 0.9210133604873681, "grad_norm": 0.060016060643486774, "learning_rate": 0.00018659883984919817, "loss": 0.8496, "step": 52460 }, { "epoch": 0.9211889253673695, "grad_norm": 0.05490352423434604, "learning_rate": 0.00018659324073484618, "loss": 0.8514, "step": 52470 }, { "epoch": 0.9213644902473709, "grad_norm": 0.05534406922999104, "learning_rate": 0.00018658764053601146, "loss": 0.8553, "step": 52480 }, { "epoch": 0.9215400551273724, "grad_norm": 0.06739847464312901, "learning_rate": 0.00018658203925276488, "loss": 0.8543, "step": 52490 }, { "epoch": 0.9217156200073737, "grad_norm": 0.0642445125513078, "learning_rate": 0.0001865764368851775, "loss": 0.8545, "step": 52500 }, { "epoch": 0.9218911848873751, "grad_norm": 0.06617560007237798, "learning_rate": 0.00018657083343332024, "loss": 0.8489, "step": 52510 }, { "epoch": 0.9220667497673766, "grad_norm": 0.0627436530575247, "learning_rate": 0.00018656522889726413, "loss": 0.8451, "step": 52520 }, { "epoch": 0.9222423146473779, "grad_norm": 0.0572444305230071, "learning_rate": 0.0001865596232770802, "loss": 0.8556, "step": 52530 }, { "epoch": 0.9224178795273793, "grad_norm": 0.06124502415445265, "learning_rate": 0.0001865540165728394, "loss": 0.8526, "step": 52540 }, { "epoch": 0.9225934444073808, "grad_norm": 0.06357652859601352, "learning_rate": 0.00018654840878461283, "loss": 0.8566, "step": 52550 }, { "epoch": 0.9227690092873821, "grad_norm": 0.05407334702176449, "learning_rate": 0.0001865427999124715, "loss": 0.8602, "step": 52560 }, { "epoch": 0.9229445741673835, "grad_norm": 0.05730982697747094, "learning_rate": 0.00018653718995648657, "loss": 0.848, "step": 52570 }, { "epoch": 0.923120139047385, "grad_norm": 0.06802857569895472, "learning_rate": 0.000186531578916729, "loss": 0.858, "step": 52580 }, { "epoch": 0.9232957039273864, "grad_norm": 0.061650603649277415, "learning_rate": 0.00018652596679326995, "loss": 0.8531, "step": 52590 }, { "epoch": 0.9234712688073877, "grad_norm": 0.05546285941775846, "learning_rate": 0.00018652035358618049, "loss": 0.8555, "step": 52600 }, { "epoch": 0.9236468336873892, "grad_norm": 0.06077111367217984, "learning_rate": 0.00018651473929553179, "loss": 0.8511, "step": 52610 }, { "epoch": 0.9238223985673906, "grad_norm": 0.06595077701663499, "learning_rate": 0.00018650912392139491, "loss": 0.8487, "step": 52620 }, { "epoch": 0.9239979634473919, "grad_norm": 0.07486354344581231, "learning_rate": 0.00018650350746384108, "loss": 0.8525, "step": 52630 }, { "epoch": 0.9241735283273934, "grad_norm": 0.05326577515516487, "learning_rate": 0.00018649788992294142, "loss": 0.8543, "step": 52640 }, { "epoch": 0.9243490932073948, "grad_norm": 0.10552815903550246, "learning_rate": 0.0001864922712987671, "loss": 0.8451, "step": 52650 }, { "epoch": 0.9245246580873961, "grad_norm": 0.061556157793090725, "learning_rate": 0.0001864866515913893, "loss": 0.8531, "step": 52660 }, { "epoch": 0.9247002229673976, "grad_norm": 0.05995617920383463, "learning_rate": 0.0001864810308008793, "loss": 0.8515, "step": 52670 }, { "epoch": 0.924875787847399, "grad_norm": 0.045314775642775244, "learning_rate": 0.0001864754089273082, "loss": 0.858, "step": 52680 }, { "epoch": 0.9250513527274004, "grad_norm": 0.06332290020978949, "learning_rate": 0.0001864697859707473, "loss": 0.8542, "step": 52690 }, { "epoch": 0.9252269176074018, "grad_norm": 0.05300400333697996, "learning_rate": 0.00018646416193126783, "loss": 0.8574, "step": 52700 }, { "epoch": 0.9254024824874032, "grad_norm": 0.050860002156386086, "learning_rate": 0.00018645853680894107, "loss": 0.8493, "step": 52710 }, { "epoch": 0.9255780473674047, "grad_norm": 0.08232432974338241, "learning_rate": 0.00018645291060383825, "loss": 0.8463, "step": 52720 }, { "epoch": 0.925753612247406, "grad_norm": 0.06951914859635001, "learning_rate": 0.0001864472833160307, "loss": 0.8494, "step": 52730 }, { "epoch": 0.9259291771274074, "grad_norm": 0.07185248061447896, "learning_rate": 0.00018644165494558968, "loss": 0.8589, "step": 52740 }, { "epoch": 0.9261047420074089, "grad_norm": 0.06074647979859754, "learning_rate": 0.0001864360254925865, "loss": 0.853, "step": 52750 }, { "epoch": 0.9262803068874103, "grad_norm": 0.09250280504103318, "learning_rate": 0.00018643039495709252, "loss": 0.8531, "step": 52760 }, { "epoch": 0.9264558717674116, "grad_norm": 0.060545783661831984, "learning_rate": 0.00018642476333917906, "loss": 0.8533, "step": 52770 }, { "epoch": 0.9266314366474131, "grad_norm": 0.06100195944831669, "learning_rate": 0.00018641913063891747, "loss": 0.8504, "step": 52780 }, { "epoch": 0.9268070015274145, "grad_norm": 0.054341703083092045, "learning_rate": 0.0001864134968563791, "loss": 0.8534, "step": 52790 }, { "epoch": 0.9269825664074158, "grad_norm": 0.052229302309438404, "learning_rate": 0.0001864078619916354, "loss": 0.8552, "step": 52800 }, { "epoch": 0.9271581312874173, "grad_norm": 0.05071248319481811, "learning_rate": 0.0001864022260447577, "loss": 0.8545, "step": 52810 }, { "epoch": 0.9273336961674187, "grad_norm": 0.065340259361956, "learning_rate": 0.00018639658901581748, "loss": 0.855, "step": 52820 }, { "epoch": 0.92750926104742, "grad_norm": 0.05289256323777533, "learning_rate": 0.00018639095090488606, "loss": 0.8607, "step": 52830 }, { "epoch": 0.9276848259274215, "grad_norm": 0.11482795970416908, "learning_rate": 0.00018638531171203494, "loss": 0.8547, "step": 52840 }, { "epoch": 0.9278603908074229, "grad_norm": 0.09636992999317114, "learning_rate": 0.00018637967143733555, "loss": 0.8475, "step": 52850 }, { "epoch": 0.9280359556874243, "grad_norm": 0.09525292468686201, "learning_rate": 0.00018637403008085935, "loss": 0.8514, "step": 52860 }, { "epoch": 0.9282115205674257, "grad_norm": 0.0873736054843577, "learning_rate": 0.00018636838764267782, "loss": 0.8577, "step": 52870 }, { "epoch": 0.9283870854474271, "grad_norm": 0.09000526149286751, "learning_rate": 0.0001863627441228625, "loss": 0.8587, "step": 52880 }, { "epoch": 0.9285626503274285, "grad_norm": 0.06993243806401504, "learning_rate": 0.00018635709952148484, "loss": 0.8463, "step": 52890 }, { "epoch": 0.9287382152074299, "grad_norm": 0.06881223524912469, "learning_rate": 0.00018635145383861634, "loss": 0.8443, "step": 52900 }, { "epoch": 0.9289137800874313, "grad_norm": 0.06078141111334846, "learning_rate": 0.0001863458070743286, "loss": 0.8556, "step": 52910 }, { "epoch": 0.9290893449674327, "grad_norm": 0.0633397999146511, "learning_rate": 0.0001863401592286931, "loss": 0.8564, "step": 52920 }, { "epoch": 0.9292649098474342, "grad_norm": 0.047030001176000286, "learning_rate": 0.00018633451030178143, "loss": 0.8507, "step": 52930 }, { "epoch": 0.9294404747274355, "grad_norm": 0.05409209278655873, "learning_rate": 0.00018632886029366515, "loss": 0.8484, "step": 52940 }, { "epoch": 0.9296160396074369, "grad_norm": 0.05731771754108553, "learning_rate": 0.0001863232092044159, "loss": 0.8559, "step": 52950 }, { "epoch": 0.9297916044874384, "grad_norm": 0.06289936496465522, "learning_rate": 0.0001863175570341052, "loss": 0.8492, "step": 52960 }, { "epoch": 0.9299671693674397, "grad_norm": 0.05761260188624836, "learning_rate": 0.0001863119037828047, "loss": 0.8451, "step": 52970 }, { "epoch": 0.9301427342474411, "grad_norm": 0.060571632662124356, "learning_rate": 0.00018630624945058604, "loss": 0.8511, "step": 52980 }, { "epoch": 0.9303182991274426, "grad_norm": 0.06875302608137962, "learning_rate": 0.00018630059403752084, "loss": 0.855, "step": 52990 }, { "epoch": 0.9304938640074439, "grad_norm": 0.06422783737825864, "learning_rate": 0.00018629493754368078, "loss": 0.8579, "step": 53000 }, { "epoch": 0.9306694288874453, "grad_norm": 0.07847203680506977, "learning_rate": 0.00018628927996913752, "loss": 0.8544, "step": 53010 }, { "epoch": 0.9308449937674468, "grad_norm": 0.045521412305349236, "learning_rate": 0.00018628362131396274, "loss": 0.8483, "step": 53020 }, { "epoch": 0.9310205586474481, "grad_norm": 0.10020794378759817, "learning_rate": 0.0001862779615782281, "loss": 0.8593, "step": 53030 }, { "epoch": 0.9311961235274495, "grad_norm": 0.08643991627970603, "learning_rate": 0.00018627230076200538, "loss": 0.8583, "step": 53040 }, { "epoch": 0.931371688407451, "grad_norm": 0.06405156262170593, "learning_rate": 0.00018626663886536627, "loss": 0.8481, "step": 53050 }, { "epoch": 0.9315472532874524, "grad_norm": 0.09661888021791752, "learning_rate": 0.00018626097588838247, "loss": 0.8503, "step": 53060 }, { "epoch": 0.9317228181674538, "grad_norm": 0.052934344194335785, "learning_rate": 0.00018625531183112578, "loss": 0.8585, "step": 53070 }, { "epoch": 0.9318983830474552, "grad_norm": 0.08824453175264592, "learning_rate": 0.00018624964669366795, "loss": 0.8494, "step": 53080 }, { "epoch": 0.9320739479274566, "grad_norm": 0.05887456138387104, "learning_rate": 0.00018624398047608075, "loss": 0.8542, "step": 53090 }, { "epoch": 0.932249512807458, "grad_norm": 0.06436235901292511, "learning_rate": 0.000186238313178436, "loss": 0.8541, "step": 53100 }, { "epoch": 0.9324250776874594, "grad_norm": 0.07771575260231488, "learning_rate": 0.00018623264480080548, "loss": 0.8414, "step": 53110 }, { "epoch": 0.9326006425674608, "grad_norm": 0.05298388726971687, "learning_rate": 0.00018622697534326102, "loss": 0.8536, "step": 53120 }, { "epoch": 0.9327762074474623, "grad_norm": 0.05711778790601906, "learning_rate": 0.00018622130480587442, "loss": 0.85, "step": 53130 }, { "epoch": 0.9329517723274636, "grad_norm": 0.05921473950070494, "learning_rate": 0.0001862156331887176, "loss": 0.8514, "step": 53140 }, { "epoch": 0.933127337207465, "grad_norm": 0.10971490506829766, "learning_rate": 0.00018620996049186235, "loss": 0.8539, "step": 53150 }, { "epoch": 0.9333029020874665, "grad_norm": 0.08989465684946801, "learning_rate": 0.00018620428671538056, "loss": 0.8449, "step": 53160 }, { "epoch": 0.9334784669674678, "grad_norm": 0.05870288484527035, "learning_rate": 0.00018619861185934416, "loss": 0.8541, "step": 53170 }, { "epoch": 0.9336540318474692, "grad_norm": 0.07474682224435653, "learning_rate": 0.000186192935923825, "loss": 0.851, "step": 53180 }, { "epoch": 0.9338295967274707, "grad_norm": 0.057768874607653335, "learning_rate": 0.00018618725890889504, "loss": 0.8541, "step": 53190 }, { "epoch": 0.934005161607472, "grad_norm": 0.05308329857234633, "learning_rate": 0.00018618158081462613, "loss": 0.8543, "step": 53200 }, { "epoch": 0.9341807264874734, "grad_norm": 0.059126228287055695, "learning_rate": 0.00018617590164109028, "loss": 0.8455, "step": 53210 }, { "epoch": 0.9343562913674749, "grad_norm": 0.05655763787829159, "learning_rate": 0.00018617022138835946, "loss": 0.857, "step": 53220 }, { "epoch": 0.9345318562474763, "grad_norm": 0.08429012011807922, "learning_rate": 0.00018616454005650562, "loss": 0.8522, "step": 53230 }, { "epoch": 0.9347074211274776, "grad_norm": 0.0762823782775509, "learning_rate": 0.0001861588576456007, "loss": 0.8613, "step": 53240 }, { "epoch": 0.9348829860074791, "grad_norm": 0.08512664281103186, "learning_rate": 0.00018615317415571675, "loss": 0.8439, "step": 53250 }, { "epoch": 0.9350585508874805, "grad_norm": 0.11196634564362304, "learning_rate": 0.00018614748958692575, "loss": 0.855, "step": 53260 }, { "epoch": 0.9352341157674818, "grad_norm": 0.0637948750944818, "learning_rate": 0.00018614180393929978, "loss": 0.8489, "step": 53270 }, { "epoch": 0.9354096806474833, "grad_norm": 0.060337310996855825, "learning_rate": 0.00018613611721291079, "loss": 0.8487, "step": 53280 }, { "epoch": 0.9355852455274847, "grad_norm": 0.047710088107510325, "learning_rate": 0.0001861304294078309, "loss": 0.8516, "step": 53290 }, { "epoch": 0.935760810407486, "grad_norm": 0.06430674545917486, "learning_rate": 0.00018612474052413214, "loss": 0.8512, "step": 53300 }, { "epoch": 0.9359363752874875, "grad_norm": 0.0576265846839811, "learning_rate": 0.0001861190505618866, "loss": 0.8543, "step": 53310 }, { "epoch": 0.9361119401674889, "grad_norm": 0.06616870611682764, "learning_rate": 0.00018611335952116643, "loss": 0.8486, "step": 53320 }, { "epoch": 0.9362875050474903, "grad_norm": 0.07206016524489993, "learning_rate": 0.00018610766740204365, "loss": 0.8535, "step": 53330 }, { "epoch": 0.9364630699274917, "grad_norm": 0.059065362503811676, "learning_rate": 0.00018610197420459036, "loss": 0.8543, "step": 53340 }, { "epoch": 0.9366386348074931, "grad_norm": 0.0593593348028111, "learning_rate": 0.0001860962799288788, "loss": 0.8452, "step": 53350 }, { "epoch": 0.9368141996874945, "grad_norm": 0.07901411604625812, "learning_rate": 0.00018609058457498106, "loss": 0.8501, "step": 53360 }, { "epoch": 0.9369897645674959, "grad_norm": 0.05781975285094038, "learning_rate": 0.00018608488814296931, "loss": 0.8613, "step": 53370 }, { "epoch": 0.9371653294474973, "grad_norm": 0.10291495742599677, "learning_rate": 0.00018607919063291572, "loss": 0.8529, "step": 53380 }, { "epoch": 0.9373408943274987, "grad_norm": 0.05888431010241856, "learning_rate": 0.0001860734920448925, "loss": 0.8504, "step": 53390 }, { "epoch": 0.9375164592075002, "grad_norm": 0.05964158250294826, "learning_rate": 0.00018606779237897178, "loss": 0.8576, "step": 53400 }, { "epoch": 0.9376920240875015, "grad_norm": 0.050981753881048426, "learning_rate": 0.00018606209163522585, "loss": 0.8598, "step": 53410 }, { "epoch": 0.9378675889675029, "grad_norm": 0.07411119415150541, "learning_rate": 0.00018605638981372693, "loss": 0.8553, "step": 53420 }, { "epoch": 0.9380431538475044, "grad_norm": 0.06676003558843394, "learning_rate": 0.00018605068691454724, "loss": 0.858, "step": 53430 }, { "epoch": 0.9382187187275057, "grad_norm": 0.05707785842502874, "learning_rate": 0.00018604498293775906, "loss": 0.8531, "step": 53440 }, { "epoch": 0.9383942836075072, "grad_norm": 0.08261143100189929, "learning_rate": 0.00018603927788343462, "loss": 0.8507, "step": 53450 }, { "epoch": 0.9385698484875086, "grad_norm": 0.06949460310879552, "learning_rate": 0.00018603357175164624, "loss": 0.8468, "step": 53460 }, { "epoch": 0.9387454133675099, "grad_norm": 0.06286742420292224, "learning_rate": 0.00018602786454246622, "loss": 0.843, "step": 53470 }, { "epoch": 0.9389209782475114, "grad_norm": 0.06195322748459346, "learning_rate": 0.00018602215625596687, "loss": 0.8546, "step": 53480 }, { "epoch": 0.9390965431275128, "grad_norm": 0.07864660314444545, "learning_rate": 0.00018601644689222047, "loss": 0.8625, "step": 53490 }, { "epoch": 0.9392721080075142, "grad_norm": 0.06908025779358791, "learning_rate": 0.00018601073645129943, "loss": 0.851, "step": 53500 }, { "epoch": 0.9394476728875156, "grad_norm": 0.06386970298142383, "learning_rate": 0.00018600502493327603, "loss": 0.8549, "step": 53510 }, { "epoch": 0.939623237767517, "grad_norm": 0.08629233935476499, "learning_rate": 0.00018599931233822273, "loss": 0.8573, "step": 53520 }, { "epoch": 0.9397988026475184, "grad_norm": 0.0670888055577602, "learning_rate": 0.0001859935986662118, "loss": 0.8489, "step": 53530 }, { "epoch": 0.9399743675275198, "grad_norm": 0.0690897993475298, "learning_rate": 0.0001859878839173157, "loss": 0.8452, "step": 53540 }, { "epoch": 0.9401499324075212, "grad_norm": 0.07305620813712654, "learning_rate": 0.00018598216809160683, "loss": 0.8505, "step": 53550 }, { "epoch": 0.9403254972875226, "grad_norm": 0.05712524218761124, "learning_rate": 0.00018597645118915762, "loss": 0.8527, "step": 53560 }, { "epoch": 0.940501062167524, "grad_norm": 0.06925092978891154, "learning_rate": 0.00018597073321004046, "loss": 0.8555, "step": 53570 }, { "epoch": 0.9406766270475254, "grad_norm": 0.06234379525228078, "learning_rate": 0.00018596501415432781, "loss": 0.851, "step": 53580 }, { "epoch": 0.9408521919275268, "grad_norm": 0.06176448239982641, "learning_rate": 0.0001859592940220922, "loss": 0.8518, "step": 53590 }, { "epoch": 0.9410277568075283, "grad_norm": 0.06187415056590022, "learning_rate": 0.00018595357281340601, "loss": 0.8535, "step": 53600 }, { "epoch": 0.9412033216875296, "grad_norm": 0.06282952954781876, "learning_rate": 0.0001859478505283418, "loss": 0.853, "step": 53610 }, { "epoch": 0.941378886567531, "grad_norm": 0.058328737323775755, "learning_rate": 0.00018594212716697202, "loss": 0.8531, "step": 53620 }, { "epoch": 0.9415544514475325, "grad_norm": 0.05853801671200862, "learning_rate": 0.0001859364027293692, "loss": 0.8557, "step": 53630 }, { "epoch": 0.9417300163275338, "grad_norm": 0.06946704587874246, "learning_rate": 0.00018593067721560592, "loss": 0.8489, "step": 53640 }, { "epoch": 0.9419055812075352, "grad_norm": 0.05480089898797709, "learning_rate": 0.00018592495062575465, "loss": 0.8521, "step": 53650 }, { "epoch": 0.9420811460875367, "grad_norm": 0.05450382944626004, "learning_rate": 0.00018591922295988799, "loss": 0.8489, "step": 53660 }, { "epoch": 0.942256710967538, "grad_norm": 0.05349484467820491, "learning_rate": 0.00018591349421807852, "loss": 0.8484, "step": 53670 }, { "epoch": 0.9424322758475394, "grad_norm": 0.05031196838143375, "learning_rate": 0.0001859077644003988, "loss": 0.8556, "step": 53680 }, { "epoch": 0.9426078407275409, "grad_norm": 0.06816853456925509, "learning_rate": 0.00018590203350692138, "loss": 0.8471, "step": 53690 }, { "epoch": 0.9427834056075423, "grad_norm": 0.07050087788521052, "learning_rate": 0.00018589630153771896, "loss": 0.8528, "step": 53700 }, { "epoch": 0.9429589704875436, "grad_norm": 0.06468522008614216, "learning_rate": 0.00018589056849286414, "loss": 0.8487, "step": 53710 }, { "epoch": 0.9431345353675451, "grad_norm": 0.057937880956632326, "learning_rate": 0.00018588483437242953, "loss": 0.8566, "step": 53720 }, { "epoch": 0.9433101002475465, "grad_norm": 0.09630393964486945, "learning_rate": 0.00018587909917648784, "loss": 0.8539, "step": 53730 }, { "epoch": 0.9434856651275478, "grad_norm": 0.05994197846211531, "learning_rate": 0.00018587336290511164, "loss": 0.8518, "step": 53740 }, { "epoch": 0.9436612300075493, "grad_norm": 0.06616697370623892, "learning_rate": 0.0001858676255583737, "loss": 0.8521, "step": 53750 }, { "epoch": 0.9438367948875507, "grad_norm": 0.06129151863652551, "learning_rate": 0.00018586188713634667, "loss": 0.8493, "step": 53760 }, { "epoch": 0.944012359767552, "grad_norm": 0.09484877699954966, "learning_rate": 0.00018585614763910329, "loss": 0.8487, "step": 53770 }, { "epoch": 0.9441879246475535, "grad_norm": 0.07234781289816868, "learning_rate": 0.0001858504070667162, "loss": 0.8589, "step": 53780 }, { "epoch": 0.9443634895275549, "grad_norm": 0.05766179244566343, "learning_rate": 0.00018584466541925825, "loss": 0.8516, "step": 53790 }, { "epoch": 0.9445390544075564, "grad_norm": 0.08479379166612562, "learning_rate": 0.0001858389226968021, "loss": 0.8479, "step": 53800 }, { "epoch": 0.9447146192875577, "grad_norm": 0.07212385086767178, "learning_rate": 0.00018583317889942053, "loss": 0.8505, "step": 53810 }, { "epoch": 0.9448901841675591, "grad_norm": 0.06990406462922998, "learning_rate": 0.00018582743402718635, "loss": 0.8543, "step": 53820 }, { "epoch": 0.9450657490475606, "grad_norm": 0.053689948869049736, "learning_rate": 0.00018582168808017232, "loss": 0.8563, "step": 53830 }, { "epoch": 0.9452413139275619, "grad_norm": 0.056469375464078604, "learning_rate": 0.00018581594105845123, "loss": 0.8496, "step": 53840 }, { "epoch": 0.9454168788075633, "grad_norm": 0.06498431017811772, "learning_rate": 0.0001858101929620959, "loss": 0.8449, "step": 53850 }, { "epoch": 0.9455924436875648, "grad_norm": 0.061827404072297615, "learning_rate": 0.00018580444379117923, "loss": 0.8456, "step": 53860 }, { "epoch": 0.9457680085675662, "grad_norm": 0.09084074943204759, "learning_rate": 0.00018579869354577396, "loss": 0.8484, "step": 53870 }, { "epoch": 0.9459435734475675, "grad_norm": 0.06529414863926848, "learning_rate": 0.000185792942225953, "loss": 0.8533, "step": 53880 }, { "epoch": 0.946119138327569, "grad_norm": 0.0666975039986091, "learning_rate": 0.0001857871898317892, "loss": 0.8532, "step": 53890 }, { "epoch": 0.9462947032075704, "grad_norm": 0.06852919544980718, "learning_rate": 0.00018578143636335549, "loss": 0.8453, "step": 53900 }, { "epoch": 0.9464702680875717, "grad_norm": 0.06512431380162538, "learning_rate": 0.0001857756818207247, "loss": 0.8482, "step": 53910 }, { "epoch": 0.9466458329675732, "grad_norm": 0.05353773163831803, "learning_rate": 0.0001857699262039698, "loss": 0.8475, "step": 53920 }, { "epoch": 0.9468213978475746, "grad_norm": 0.05643220526997353, "learning_rate": 0.00018576416951316364, "loss": 0.8553, "step": 53930 }, { "epoch": 0.9469969627275759, "grad_norm": 0.06170024213185728, "learning_rate": 0.0001857584117483792, "loss": 0.8561, "step": 53940 }, { "epoch": 0.9471725276075774, "grad_norm": 0.05894453670279045, "learning_rate": 0.00018575265290968955, "loss": 0.8533, "step": 53950 }, { "epoch": 0.9473480924875788, "grad_norm": 0.0524419208916826, "learning_rate": 0.00018574689299716746, "loss": 0.8582, "step": 53960 }, { "epoch": 0.9475236573675802, "grad_norm": 0.0581366022017881, "learning_rate": 0.00018574113201088598, "loss": 0.8502, "step": 53970 }, { "epoch": 0.9476992222475816, "grad_norm": 0.07667456399782642, "learning_rate": 0.00018573536995091815, "loss": 0.8496, "step": 53980 }, { "epoch": 0.947874787127583, "grad_norm": 0.056329860137378694, "learning_rate": 0.00018572960681733695, "loss": 0.8525, "step": 53990 }, { "epoch": 0.9480503520075844, "grad_norm": 0.06094916864712991, "learning_rate": 0.00018572384261021538, "loss": 0.8527, "step": 54000 }, { "epoch": 0.9482259168875858, "grad_norm": 0.04871483392827813, "learning_rate": 0.00018571807732962648, "loss": 0.8603, "step": 54010 }, { "epoch": 0.9484014817675872, "grad_norm": 0.060722613845225516, "learning_rate": 0.00018571231097564333, "loss": 0.8588, "step": 54020 }, { "epoch": 0.9485770466475886, "grad_norm": 0.054809983642605795, "learning_rate": 0.00018570654354833893, "loss": 0.8555, "step": 54030 }, { "epoch": 0.94875261152759, "grad_norm": 0.06313626418320133, "learning_rate": 0.0001857007750477864, "loss": 0.8574, "step": 54040 }, { "epoch": 0.9489281764075914, "grad_norm": 0.08154454964409878, "learning_rate": 0.00018569500547405883, "loss": 0.8529, "step": 54050 }, { "epoch": 0.9491037412875928, "grad_norm": 0.07563764218638472, "learning_rate": 0.0001856892348272293, "loss": 0.8521, "step": 54060 }, { "epoch": 0.9492793061675943, "grad_norm": 0.05583690686888979, "learning_rate": 0.00018568346310737094, "loss": 0.8504, "step": 54070 }, { "epoch": 0.9494548710475956, "grad_norm": 0.06218101565954428, "learning_rate": 0.00018567769031455685, "loss": 0.8514, "step": 54080 }, { "epoch": 0.949630435927597, "grad_norm": 0.07477745365436439, "learning_rate": 0.00018567191644886023, "loss": 0.8595, "step": 54090 }, { "epoch": 0.9498060008075985, "grad_norm": 0.05087197673976684, "learning_rate": 0.00018566614151035418, "loss": 0.8401, "step": 54100 }, { "epoch": 0.9499815656875998, "grad_norm": 0.05719067534983043, "learning_rate": 0.0001856603654991119, "loss": 0.8565, "step": 54110 }, { "epoch": 0.9501571305676012, "grad_norm": 0.06247082065332925, "learning_rate": 0.00018565458841520658, "loss": 0.8518, "step": 54120 }, { "epoch": 0.9503326954476027, "grad_norm": 0.09013147914560854, "learning_rate": 0.00018564881025871137, "loss": 0.8481, "step": 54130 }, { "epoch": 0.950508260327604, "grad_norm": 0.05210122256084442, "learning_rate": 0.00018564303102969952, "loss": 0.857, "step": 54140 }, { "epoch": 0.9506838252076054, "grad_norm": 0.06274604484734791, "learning_rate": 0.00018563725072824426, "loss": 0.851, "step": 54150 }, { "epoch": 0.9508593900876069, "grad_norm": 0.0558571741402602, "learning_rate": 0.0001856314693544188, "loss": 0.8432, "step": 54160 }, { "epoch": 0.9510349549676083, "grad_norm": 0.05962600239569053, "learning_rate": 0.0001856256869082964, "loss": 0.8493, "step": 54170 }, { "epoch": 0.9512105198476097, "grad_norm": 0.08939146073703436, "learning_rate": 0.00018561990338995032, "loss": 0.8587, "step": 54180 }, { "epoch": 0.9513860847276111, "grad_norm": 0.06901872113649257, "learning_rate": 0.00018561411879945388, "loss": 0.855, "step": 54190 }, { "epoch": 0.9515616496076125, "grad_norm": 0.05574194587925945, "learning_rate": 0.00018560833313688028, "loss": 0.8526, "step": 54200 }, { "epoch": 0.951737214487614, "grad_norm": 0.08030951701240316, "learning_rate": 0.00018560254640230295, "loss": 0.8515, "step": 54210 }, { "epoch": 0.9519127793676153, "grad_norm": 0.058178354185772295, "learning_rate": 0.00018559675859579507, "loss": 0.8458, "step": 54220 }, { "epoch": 0.9520883442476167, "grad_norm": 0.049132377136920945, "learning_rate": 0.00018559096971743012, "loss": 0.853, "step": 54230 }, { "epoch": 0.9522639091276182, "grad_norm": 0.06534712095312196, "learning_rate": 0.00018558517976728135, "loss": 0.8476, "step": 54240 }, { "epoch": 0.9524394740076195, "grad_norm": 0.07729234952964717, "learning_rate": 0.00018557938874542214, "loss": 0.8539, "step": 54250 }, { "epoch": 0.9526150388876209, "grad_norm": 0.06239805454638391, "learning_rate": 0.00018557359665192586, "loss": 0.8521, "step": 54260 }, { "epoch": 0.9527906037676224, "grad_norm": 0.07006186060052604, "learning_rate": 0.00018556780348686592, "loss": 0.8482, "step": 54270 }, { "epoch": 0.9529661686476237, "grad_norm": 0.0685217225777155, "learning_rate": 0.00018556200925031564, "loss": 0.8465, "step": 54280 }, { "epoch": 0.9531417335276251, "grad_norm": 0.07819626186195873, "learning_rate": 0.00018555621394234853, "loss": 0.8572, "step": 54290 }, { "epoch": 0.9533172984076266, "grad_norm": 0.06911111388868735, "learning_rate": 0.00018555041756303803, "loss": 0.8558, "step": 54300 }, { "epoch": 0.953492863287628, "grad_norm": 0.07701689991933643, "learning_rate": 0.0001855446201124575, "loss": 0.8511, "step": 54310 }, { "epoch": 0.9536684281676293, "grad_norm": 0.04527688378177501, "learning_rate": 0.0001855388215906804, "loss": 0.855, "step": 54320 }, { "epoch": 0.9538439930476308, "grad_norm": 0.05829381617578348, "learning_rate": 0.00018553302199778027, "loss": 0.8575, "step": 54330 }, { "epoch": 0.9540195579276322, "grad_norm": 0.04960008216162136, "learning_rate": 0.00018552722133383055, "loss": 0.8488, "step": 54340 }, { "epoch": 0.9541951228076335, "grad_norm": 0.0802672650511748, "learning_rate": 0.0001855214195989047, "loss": 0.8575, "step": 54350 }, { "epoch": 0.954370687687635, "grad_norm": 0.05198426213321711, "learning_rate": 0.00018551561679307633, "loss": 0.8448, "step": 54360 }, { "epoch": 0.9545462525676364, "grad_norm": 0.055178446741323814, "learning_rate": 0.00018550981291641882, "loss": 0.8437, "step": 54370 }, { "epoch": 0.9547218174476377, "grad_norm": 0.0647293447298661, "learning_rate": 0.00018550400796900582, "loss": 0.8462, "step": 54380 }, { "epoch": 0.9548973823276392, "grad_norm": 0.0649594347273618, "learning_rate": 0.0001854982019509109, "loss": 0.8462, "step": 54390 }, { "epoch": 0.9550729472076406, "grad_norm": 0.04450647974129818, "learning_rate": 0.00018549239486220748, "loss": 0.8561, "step": 54400 }, { "epoch": 0.9552485120876419, "grad_norm": 0.07206989843055972, "learning_rate": 0.00018548658670296925, "loss": 0.8461, "step": 54410 }, { "epoch": 0.9554240769676434, "grad_norm": 0.055369992810811966, "learning_rate": 0.00018548077747326978, "loss": 0.8551, "step": 54420 }, { "epoch": 0.9555996418476448, "grad_norm": 0.07209220630463675, "learning_rate": 0.0001854749671731827, "loss": 0.8548, "step": 54430 }, { "epoch": 0.9557752067276462, "grad_norm": 0.09849552757073331, "learning_rate": 0.00018546915580278157, "loss": 0.8547, "step": 54440 }, { "epoch": 0.9559507716076476, "grad_norm": 0.054070577651868165, "learning_rate": 0.00018546334336214, "loss": 0.8576, "step": 54450 }, { "epoch": 0.956126336487649, "grad_norm": 0.07287897873717422, "learning_rate": 0.00018545752985133177, "loss": 0.8476, "step": 54460 }, { "epoch": 0.9563019013676504, "grad_norm": 0.10064046825563364, "learning_rate": 0.0001854517152704304, "loss": 0.8549, "step": 54470 }, { "epoch": 0.9564774662476518, "grad_norm": 0.06871207292336907, "learning_rate": 0.00018544589961950962, "loss": 0.853, "step": 54480 }, { "epoch": 0.9566530311276532, "grad_norm": 0.05466980330390416, "learning_rate": 0.0001854400828986431, "loss": 0.855, "step": 54490 }, { "epoch": 0.9568285960076546, "grad_norm": 0.06795797993875703, "learning_rate": 0.00018543426510790457, "loss": 0.8543, "step": 54500 }, { "epoch": 0.957004160887656, "grad_norm": 0.06274678685720017, "learning_rate": 0.00018542844624736769, "loss": 0.8471, "step": 54510 }, { "epoch": 0.9571797257676574, "grad_norm": 0.059728414483256616, "learning_rate": 0.00018542262631710625, "loss": 0.8483, "step": 54520 }, { "epoch": 0.9573552906476589, "grad_norm": 0.06107976261254306, "learning_rate": 0.0001854168053171939, "loss": 0.8442, "step": 54530 }, { "epoch": 0.9575308555276603, "grad_norm": 0.06593359989469894, "learning_rate": 0.0001854109832477045, "loss": 0.8525, "step": 54540 }, { "epoch": 0.9577064204076616, "grad_norm": 0.05250797592266925, "learning_rate": 0.00018540516010871175, "loss": 0.8501, "step": 54550 }, { "epoch": 0.9578819852876631, "grad_norm": 0.06143394036182521, "learning_rate": 0.00018539933590028943, "loss": 0.8537, "step": 54560 }, { "epoch": 0.9580575501676645, "grad_norm": 0.0612258751030492, "learning_rate": 0.00018539351062251137, "loss": 0.8418, "step": 54570 }, { "epoch": 0.9582331150476658, "grad_norm": 0.050165479540366535, "learning_rate": 0.00018538768427545137, "loss": 0.8544, "step": 54580 }, { "epoch": 0.9584086799276673, "grad_norm": 0.05846749718776558, "learning_rate": 0.00018538185685918322, "loss": 0.8515, "step": 54590 }, { "epoch": 0.9585842448076687, "grad_norm": 0.05349466226121646, "learning_rate": 0.00018537602837378076, "loss": 0.8499, "step": 54600 }, { "epoch": 0.95875980968767, "grad_norm": 0.055316818296106775, "learning_rate": 0.00018537019881931786, "loss": 0.8502, "step": 54610 }, { "epoch": 0.9589353745676715, "grad_norm": 0.060605810154365974, "learning_rate": 0.00018536436819586836, "loss": 0.8492, "step": 54620 }, { "epoch": 0.9591109394476729, "grad_norm": 0.06392038305309096, "learning_rate": 0.00018535853650350617, "loss": 0.8456, "step": 54630 }, { "epoch": 0.9592865043276743, "grad_norm": 0.060862025814821336, "learning_rate": 0.00018535270374230514, "loss": 0.8578, "step": 54640 }, { "epoch": 0.9594620692076757, "grad_norm": 0.05492652794475265, "learning_rate": 0.00018534686991233915, "loss": 0.8528, "step": 54650 }, { "epoch": 0.9596376340876771, "grad_norm": 0.10049920740794822, "learning_rate": 0.00018534103501368222, "loss": 0.8549, "step": 54660 }, { "epoch": 0.9598131989676785, "grad_norm": 0.07549946622447878, "learning_rate": 0.0001853351990464082, "loss": 0.8468, "step": 54670 }, { "epoch": 0.95998876384768, "grad_norm": 0.06684809260188489, "learning_rate": 0.00018532936201059102, "loss": 0.8465, "step": 54680 }, { "epoch": 0.9601643287276813, "grad_norm": 0.05879738598479774, "learning_rate": 0.00018532352390630467, "loss": 0.8485, "step": 54690 }, { "epoch": 0.9603398936076827, "grad_norm": 0.05643508034506602, "learning_rate": 0.0001853176847336231, "loss": 0.8413, "step": 54700 }, { "epoch": 0.9605154584876842, "grad_norm": 0.054607381972910964, "learning_rate": 0.00018531184449262033, "loss": 0.8543, "step": 54710 }, { "epoch": 0.9606910233676855, "grad_norm": 0.0684252012627183, "learning_rate": 0.00018530600318337033, "loss": 0.8514, "step": 54720 }, { "epoch": 0.9608665882476869, "grad_norm": 0.048011149614720576, "learning_rate": 0.00018530016080594707, "loss": 0.8494, "step": 54730 }, { "epoch": 0.9610421531276884, "grad_norm": 0.06692847368599607, "learning_rate": 0.00018529431736042466, "loss": 0.8496, "step": 54740 }, { "epoch": 0.9612177180076897, "grad_norm": 0.07911500075453089, "learning_rate": 0.00018528847284687708, "loss": 0.8567, "step": 54750 }, { "epoch": 0.9613932828876911, "grad_norm": 0.05960608570571123, "learning_rate": 0.0001852826272653784, "loss": 0.8483, "step": 54760 }, { "epoch": 0.9615688477676926, "grad_norm": 0.07189012577310204, "learning_rate": 0.0001852767806160027, "loss": 0.8625, "step": 54770 }, { "epoch": 0.961744412647694, "grad_norm": 0.11558333901357626, "learning_rate": 0.000185270932898824, "loss": 0.8523, "step": 54780 }, { "epoch": 0.9619199775276953, "grad_norm": 0.0741711994790164, "learning_rate": 0.00018526508411391648, "loss": 0.8493, "step": 54790 }, { "epoch": 0.9620955424076968, "grad_norm": 0.0867529806092186, "learning_rate": 0.00018525923426135414, "loss": 0.8451, "step": 54800 }, { "epoch": 0.9622711072876982, "grad_norm": 0.07353928300276961, "learning_rate": 0.00018525338334121117, "loss": 0.8532, "step": 54810 }, { "epoch": 0.9624466721676995, "grad_norm": 0.05812702526478087, "learning_rate": 0.0001852475313535617, "loss": 0.8525, "step": 54820 }, { "epoch": 0.962622237047701, "grad_norm": 0.07202715521465064, "learning_rate": 0.00018524167829847985, "loss": 0.8464, "step": 54830 }, { "epoch": 0.9627978019277024, "grad_norm": 0.06603802762527142, "learning_rate": 0.0001852358241760398, "loss": 0.8491, "step": 54840 }, { "epoch": 0.9629733668077037, "grad_norm": 0.06783559127794969, "learning_rate": 0.0001852299689863157, "loss": 0.8523, "step": 54850 }, { "epoch": 0.9631489316877052, "grad_norm": 0.05753180361026002, "learning_rate": 0.00018522411272938174, "loss": 0.8507, "step": 54860 }, { "epoch": 0.9633244965677066, "grad_norm": 0.08623594228687413, "learning_rate": 0.00018521825540531218, "loss": 0.8452, "step": 54870 }, { "epoch": 0.963500061447708, "grad_norm": 0.06923387287572728, "learning_rate": 0.00018521239701418113, "loss": 0.8472, "step": 54880 }, { "epoch": 0.9636756263277094, "grad_norm": 0.06073217248891654, "learning_rate": 0.0001852065375560629, "loss": 0.845, "step": 54890 }, { "epoch": 0.9638511912077108, "grad_norm": 0.05559230449661111, "learning_rate": 0.0001852006770310317, "loss": 0.85, "step": 54900 }, { "epoch": 0.9640267560877123, "grad_norm": 0.06243009981511851, "learning_rate": 0.00018519481543916177, "loss": 0.8555, "step": 54910 }, { "epoch": 0.9642023209677136, "grad_norm": 0.05890306446116319, "learning_rate": 0.0001851889527805274, "loss": 0.8508, "step": 54920 }, { "epoch": 0.964377885847715, "grad_norm": 0.05611702798284687, "learning_rate": 0.00018518308905520286, "loss": 0.8486, "step": 54930 }, { "epoch": 0.9645534507277165, "grad_norm": 0.05844910132407351, "learning_rate": 0.00018517722426326244, "loss": 0.8448, "step": 54940 }, { "epoch": 0.9647290156077178, "grad_norm": 0.059334716382845584, "learning_rate": 0.0001851713584047805, "loss": 0.8483, "step": 54950 }, { "epoch": 0.9649045804877192, "grad_norm": 0.0502403404457577, "learning_rate": 0.00018516549147983127, "loss": 0.8514, "step": 54960 }, { "epoch": 0.9650801453677207, "grad_norm": 0.0651734444949658, "learning_rate": 0.00018515962348848914, "loss": 0.8562, "step": 54970 }, { "epoch": 0.965255710247722, "grad_norm": 0.05078363686571217, "learning_rate": 0.00018515375443082849, "loss": 0.8537, "step": 54980 }, { "epoch": 0.9654312751277234, "grad_norm": 0.07005084369373878, "learning_rate": 0.0001851478843069236, "loss": 0.8538, "step": 54990 }, { "epoch": 0.9656068400077249, "grad_norm": 0.04930638134099371, "learning_rate": 0.0001851420131168489, "loss": 0.8525, "step": 55000 }, { "epoch": 0.9657824048877263, "grad_norm": 0.0571127276703054, "learning_rate": 0.00018513614086067878, "loss": 0.8436, "step": 55010 }, { "epoch": 0.9659579697677276, "grad_norm": 0.057161985037682224, "learning_rate": 0.0001851302675384876, "loss": 0.8509, "step": 55020 }, { "epoch": 0.9661335346477291, "grad_norm": 0.09423229017482862, "learning_rate": 0.00018512439315034984, "loss": 0.8579, "step": 55030 }, { "epoch": 0.9663090995277305, "grad_norm": 0.05799227052258098, "learning_rate": 0.0001851185176963399, "loss": 0.8504, "step": 55040 }, { "epoch": 0.9664846644077318, "grad_norm": 0.0631095850311377, "learning_rate": 0.0001851126411765322, "loss": 0.8493, "step": 55050 }, { "epoch": 0.9666602292877333, "grad_norm": 0.05709538869992791, "learning_rate": 0.0001851067635910012, "loss": 0.8514, "step": 55060 }, { "epoch": 0.9668357941677347, "grad_norm": 0.0674495336783126, "learning_rate": 0.0001851008849398214, "loss": 0.8505, "step": 55070 }, { "epoch": 0.967011359047736, "grad_norm": 0.05229014339336654, "learning_rate": 0.00018509500522306732, "loss": 0.8551, "step": 55080 }, { "epoch": 0.9671869239277375, "grad_norm": 0.07509144036773872, "learning_rate": 0.00018508912444081336, "loss": 0.8456, "step": 55090 }, { "epoch": 0.9673624888077389, "grad_norm": 0.0729085304413324, "learning_rate": 0.00018508324259313406, "loss": 0.8525, "step": 55100 }, { "epoch": 0.9675380536877403, "grad_norm": 0.047221179838645544, "learning_rate": 0.00018507735968010397, "loss": 0.8563, "step": 55110 }, { "epoch": 0.9677136185677417, "grad_norm": 0.07097445793519247, "learning_rate": 0.00018507147570179765, "loss": 0.8493, "step": 55120 }, { "epoch": 0.9678891834477431, "grad_norm": 0.0588745210348203, "learning_rate": 0.00018506559065828958, "loss": 0.8546, "step": 55130 }, { "epoch": 0.9680647483277445, "grad_norm": 0.08921540229976893, "learning_rate": 0.00018505970454965444, "loss": 0.8566, "step": 55140 }, { "epoch": 0.968240313207746, "grad_norm": 0.055123595532949335, "learning_rate": 0.00018505381737596665, "loss": 0.8492, "step": 55150 }, { "epoch": 0.9684158780877473, "grad_norm": 0.049647610382968284, "learning_rate": 0.0001850479291373009, "loss": 0.8516, "step": 55160 }, { "epoch": 0.9685914429677487, "grad_norm": 0.05680136393105547, "learning_rate": 0.00018504203983373183, "loss": 0.8532, "step": 55170 }, { "epoch": 0.9687670078477502, "grad_norm": 0.049758433550845256, "learning_rate": 0.00018503614946533396, "loss": 0.8514, "step": 55180 }, { "epoch": 0.9689425727277515, "grad_norm": 0.07338301561691404, "learning_rate": 0.000185030258032182, "loss": 0.8478, "step": 55190 }, { "epoch": 0.9691181376077529, "grad_norm": 0.0745504486611267, "learning_rate": 0.00018502436553435053, "loss": 0.844, "step": 55200 }, { "epoch": 0.9692937024877544, "grad_norm": 0.05432648325120253, "learning_rate": 0.00018501847197191428, "loss": 0.8458, "step": 55210 }, { "epoch": 0.9694692673677557, "grad_norm": 0.0600427835850341, "learning_rate": 0.0001850125773449479, "loss": 0.8453, "step": 55220 }, { "epoch": 0.9696448322477571, "grad_norm": 0.07028897274735327, "learning_rate": 0.000185006681653526, "loss": 0.8594, "step": 55230 }, { "epoch": 0.9698203971277586, "grad_norm": 0.070069624349586, "learning_rate": 0.00018500078489772342, "loss": 0.8567, "step": 55240 }, { "epoch": 0.96999596200776, "grad_norm": 0.07385128169616227, "learning_rate": 0.00018499488707761474, "loss": 0.8549, "step": 55250 }, { "epoch": 0.9701715268877614, "grad_norm": 0.055309723706483994, "learning_rate": 0.00018498898819327477, "loss": 0.8519, "step": 55260 }, { "epoch": 0.9703470917677628, "grad_norm": 0.05481935191645343, "learning_rate": 0.0001849830882447782, "loss": 0.8504, "step": 55270 }, { "epoch": 0.9705226566477642, "grad_norm": 0.05511184607440725, "learning_rate": 0.00018497718723219987, "loss": 0.856, "step": 55280 }, { "epoch": 0.9706982215277656, "grad_norm": 0.05043693779624383, "learning_rate": 0.00018497128515561443, "loss": 0.8526, "step": 55290 }, { "epoch": 0.970873786407767, "grad_norm": 0.05993918914221316, "learning_rate": 0.0001849653820150967, "loss": 0.8471, "step": 55300 }, { "epoch": 0.9710493512877684, "grad_norm": 0.06443026945290094, "learning_rate": 0.00018495947781072152, "loss": 0.8517, "step": 55310 }, { "epoch": 0.9712249161677698, "grad_norm": 0.056166968211571626, "learning_rate": 0.00018495357254256364, "loss": 0.8532, "step": 55320 }, { "epoch": 0.9714004810477712, "grad_norm": 0.06223028828059734, "learning_rate": 0.0001849476662106979, "loss": 0.8508, "step": 55330 }, { "epoch": 0.9715760459277726, "grad_norm": 0.06451481454764314, "learning_rate": 0.00018494175881519917, "loss": 0.8559, "step": 55340 }, { "epoch": 0.9717516108077741, "grad_norm": 0.07198287370170346, "learning_rate": 0.00018493585035614228, "loss": 0.8461, "step": 55350 }, { "epoch": 0.9719271756877754, "grad_norm": 0.06517862268917182, "learning_rate": 0.00018492994083360207, "loss": 0.851, "step": 55360 }, { "epoch": 0.9721027405677768, "grad_norm": 0.09044334137889143, "learning_rate": 0.0001849240302476534, "loss": 0.8492, "step": 55370 }, { "epoch": 0.9722783054477783, "grad_norm": 0.0791707695685372, "learning_rate": 0.00018491811859837123, "loss": 0.8461, "step": 55380 }, { "epoch": 0.9724538703277796, "grad_norm": 0.047997155773097606, "learning_rate": 0.00018491220588583035, "loss": 0.8539, "step": 55390 }, { "epoch": 0.972629435207781, "grad_norm": 0.05769530954061769, "learning_rate": 0.00018490629211010578, "loss": 0.8504, "step": 55400 }, { "epoch": 0.9728050000877825, "grad_norm": 0.03992966713341037, "learning_rate": 0.0001849003772712724, "loss": 0.8554, "step": 55410 }, { "epoch": 0.9729805649677838, "grad_norm": 0.07676516011113174, "learning_rate": 0.00018489446136940515, "loss": 0.8482, "step": 55420 }, { "epoch": 0.9731561298477852, "grad_norm": 0.057535122047676315, "learning_rate": 0.00018488854440457903, "loss": 0.8501, "step": 55430 }, { "epoch": 0.9733316947277867, "grad_norm": 0.06485475917239733, "learning_rate": 0.00018488262637686895, "loss": 0.8487, "step": 55440 }, { "epoch": 0.9735072596077881, "grad_norm": 0.05134617149605119, "learning_rate": 0.00018487670728634993, "loss": 0.8583, "step": 55450 }, { "epoch": 0.9736828244877894, "grad_norm": 0.06144904273514751, "learning_rate": 0.0001848707871330969, "loss": 0.8526, "step": 55460 }, { "epoch": 0.9738583893677909, "grad_norm": 0.057796119617351975, "learning_rate": 0.00018486486591718499, "loss": 0.8529, "step": 55470 }, { "epoch": 0.9740339542477923, "grad_norm": 0.08998231739668633, "learning_rate": 0.00018485894363868914, "loss": 0.8451, "step": 55480 }, { "epoch": 0.9742095191277936, "grad_norm": 0.06694759636276848, "learning_rate": 0.0001848530202976844, "loss": 0.8485, "step": 55490 }, { "epoch": 0.9743850840077951, "grad_norm": 0.06417931726286148, "learning_rate": 0.00018484709589424583, "loss": 0.8492, "step": 55500 }, { "epoch": 0.9745606488877965, "grad_norm": 0.06659707210848947, "learning_rate": 0.00018484117042844847, "loss": 0.8598, "step": 55510 }, { "epoch": 0.9747362137677978, "grad_norm": 0.07563730617461892, "learning_rate": 0.0001848352439003674, "loss": 0.8524, "step": 55520 }, { "epoch": 0.9749117786477993, "grad_norm": 0.07155945178052073, "learning_rate": 0.00018482931631007773, "loss": 0.8516, "step": 55530 }, { "epoch": 0.9750873435278007, "grad_norm": 0.049655900102784555, "learning_rate": 0.00018482338765765457, "loss": 0.856, "step": 55540 }, { "epoch": 0.975262908407802, "grad_norm": 0.0609501190736301, "learning_rate": 0.000184817457943173, "loss": 0.8488, "step": 55550 }, { "epoch": 0.9754384732878035, "grad_norm": 0.0735315568179351, "learning_rate": 0.0001848115271667082, "loss": 0.856, "step": 55560 }, { "epoch": 0.9756140381678049, "grad_norm": 0.05443047942757224, "learning_rate": 0.00018480559532833528, "loss": 0.8544, "step": 55570 }, { "epoch": 0.9757896030478063, "grad_norm": 0.06752363635914012, "learning_rate": 0.00018479966242812942, "loss": 0.8461, "step": 55580 }, { "epoch": 0.9759651679278077, "grad_norm": 0.05849240156022589, "learning_rate": 0.00018479372846616577, "loss": 0.8535, "step": 55590 }, { "epoch": 0.9761407328078091, "grad_norm": 0.06901549899903517, "learning_rate": 0.00018478779344251954, "loss": 0.845, "step": 55600 }, { "epoch": 0.9763162976878106, "grad_norm": 0.07548094580868875, "learning_rate": 0.00018478185735726585, "loss": 0.8494, "step": 55610 }, { "epoch": 0.976491862567812, "grad_norm": 0.09708198272709694, "learning_rate": 0.00018477592021048006, "loss": 0.8458, "step": 55620 }, { "epoch": 0.9766674274478133, "grad_norm": 0.0752112981656823, "learning_rate": 0.0001847699820022372, "loss": 0.8484, "step": 55630 }, { "epoch": 0.9768429923278148, "grad_norm": 0.056159828413710054, "learning_rate": 0.00018476404273261266, "loss": 0.8502, "step": 55640 }, { "epoch": 0.9770185572078162, "grad_norm": 0.068034387498023, "learning_rate": 0.00018475810240168168, "loss": 0.8497, "step": 55650 }, { "epoch": 0.9771941220878175, "grad_norm": 0.06044543016760586, "learning_rate": 0.00018475216100951946, "loss": 0.8509, "step": 55660 }, { "epoch": 0.977369686967819, "grad_norm": 0.07512595841475327, "learning_rate": 0.0001847462185562013, "loss": 0.8508, "step": 55670 }, { "epoch": 0.9775452518478204, "grad_norm": 0.06228754319453959, "learning_rate": 0.0001847402750418025, "loss": 0.8582, "step": 55680 }, { "epoch": 0.9777208167278217, "grad_norm": 0.05807298330666178, "learning_rate": 0.0001847343304663984, "loss": 0.8448, "step": 55690 }, { "epoch": 0.9778963816078232, "grad_norm": 0.05948352216811074, "learning_rate": 0.00018472838483006424, "loss": 0.8501, "step": 55700 }, { "epoch": 0.9780719464878246, "grad_norm": 0.050300660156148466, "learning_rate": 0.00018472243813287548, "loss": 0.8509, "step": 55710 }, { "epoch": 0.978247511367826, "grad_norm": 0.06682234113413782, "learning_rate": 0.0001847164903749073, "loss": 0.8596, "step": 55720 }, { "epoch": 0.9784230762478274, "grad_norm": 0.06597529322468294, "learning_rate": 0.00018471054155623512, "loss": 0.8538, "step": 55730 }, { "epoch": 0.9785986411278288, "grad_norm": 0.062095505587333275, "learning_rate": 0.00018470459167693442, "loss": 0.8544, "step": 55740 }, { "epoch": 0.9787742060078302, "grad_norm": 0.06281225941136828, "learning_rate": 0.00018469864073708046, "loss": 0.8445, "step": 55750 }, { "epoch": 0.9789497708878316, "grad_norm": 0.05208378120539585, "learning_rate": 0.00018469268873674868, "loss": 0.8525, "step": 55760 }, { "epoch": 0.979125335767833, "grad_norm": 0.08056171820162913, "learning_rate": 0.0001846867356760145, "loss": 0.8524, "step": 55770 }, { "epoch": 0.9793009006478344, "grad_norm": 0.06694171628558333, "learning_rate": 0.00018468078155495335, "loss": 0.8526, "step": 55780 }, { "epoch": 0.9794764655278358, "grad_norm": 0.06323177497721924, "learning_rate": 0.00018467482637364065, "loss": 0.8531, "step": 55790 }, { "epoch": 0.9796520304078372, "grad_norm": 0.05646302783777308, "learning_rate": 0.00018466887013215185, "loss": 0.8505, "step": 55800 }, { "epoch": 0.9798275952878386, "grad_norm": 0.0617915315689957, "learning_rate": 0.00018466291283056246, "loss": 0.8565, "step": 55810 }, { "epoch": 0.9800031601678401, "grad_norm": 0.06764278207616155, "learning_rate": 0.00018465695446894792, "loss": 0.8555, "step": 55820 }, { "epoch": 0.9801787250478414, "grad_norm": 0.0602906141468306, "learning_rate": 0.0001846509950473837, "loss": 0.8496, "step": 55830 }, { "epoch": 0.9803542899278428, "grad_norm": 0.06281601296602188, "learning_rate": 0.00018464503456594537, "loss": 0.8456, "step": 55840 }, { "epoch": 0.9805298548078443, "grad_norm": 0.06402301636234486, "learning_rate": 0.00018463907302470844, "loss": 0.857, "step": 55850 }, { "epoch": 0.9807054196878456, "grad_norm": 0.07276058733980836, "learning_rate": 0.0001846331104237484, "loss": 0.858, "step": 55860 }, { "epoch": 0.980880984567847, "grad_norm": 0.0579619233657369, "learning_rate": 0.00018462714676314082, "loss": 0.8562, "step": 55870 }, { "epoch": 0.9810565494478485, "grad_norm": 0.07557107832416061, "learning_rate": 0.0001846211820429613, "loss": 0.8475, "step": 55880 }, { "epoch": 0.9812321143278498, "grad_norm": 0.07079371601885488, "learning_rate": 0.00018461521626328534, "loss": 0.8467, "step": 55890 }, { "epoch": 0.9814076792078512, "grad_norm": 0.07008710048741984, "learning_rate": 0.00018460924942418855, "loss": 0.8543, "step": 55900 }, { "epoch": 0.9815832440878527, "grad_norm": 0.05294574699444911, "learning_rate": 0.0001846032815257466, "loss": 0.8559, "step": 55910 }, { "epoch": 0.9817588089678541, "grad_norm": 0.06899721980444608, "learning_rate": 0.000184597312568035, "loss": 0.8522, "step": 55920 }, { "epoch": 0.9819343738478554, "grad_norm": 0.06754631031773804, "learning_rate": 0.00018459134255112945, "loss": 0.8504, "step": 55930 }, { "epoch": 0.9821099387278569, "grad_norm": 0.057313839564732724, "learning_rate": 0.0001845853714751056, "loss": 0.8501, "step": 55940 }, { "epoch": 0.9822855036078583, "grad_norm": 0.07127588726819924, "learning_rate": 0.00018457939934003902, "loss": 0.8515, "step": 55950 }, { "epoch": 0.9824610684878596, "grad_norm": 0.06125392138724135, "learning_rate": 0.00018457342614600547, "loss": 0.8507, "step": 55960 }, { "epoch": 0.9826366333678611, "grad_norm": 0.07463119017923493, "learning_rate": 0.0001845674518930806, "loss": 0.8517, "step": 55970 }, { "epoch": 0.9828121982478625, "grad_norm": 0.07366189327701678, "learning_rate": 0.00018456147658134004, "loss": 0.8512, "step": 55980 }, { "epoch": 0.982987763127864, "grad_norm": 0.08153105381161743, "learning_rate": 0.00018455550021085964, "loss": 0.844, "step": 55990 }, { "epoch": 0.9831633280078653, "grad_norm": 0.11749778103322601, "learning_rate": 0.00018454952278171497, "loss": 0.8591, "step": 56000 }, { "epoch": 0.9833388928878667, "grad_norm": 0.08121817076314049, "learning_rate": 0.00018454354429398186, "loss": 0.8523, "step": 56010 }, { "epoch": 0.9835144577678682, "grad_norm": 0.07079463129232026, "learning_rate": 0.00018453756474773604, "loss": 0.8525, "step": 56020 }, { "epoch": 0.9836900226478695, "grad_norm": 0.08404419664192106, "learning_rate": 0.00018453158414305325, "loss": 0.8564, "step": 56030 }, { "epoch": 0.9838655875278709, "grad_norm": 0.07247140318245286, "learning_rate": 0.0001845256024800093, "loss": 0.8465, "step": 56040 }, { "epoch": 0.9840411524078724, "grad_norm": 0.08300618838905575, "learning_rate": 0.00018451961975867996, "loss": 0.8529, "step": 56050 }, { "epoch": 0.9842167172878737, "grad_norm": 0.06315732463736477, "learning_rate": 0.00018451363597914101, "loss": 0.8454, "step": 56060 }, { "epoch": 0.9843922821678751, "grad_norm": 0.05988911987197718, "learning_rate": 0.00018450765114146826, "loss": 0.8431, "step": 56070 }, { "epoch": 0.9845678470478766, "grad_norm": 0.06688925501363228, "learning_rate": 0.00018450166524573762, "loss": 0.845, "step": 56080 }, { "epoch": 0.984743411927878, "grad_norm": 0.049573475626486764, "learning_rate": 0.00018449567829202488, "loss": 0.8531, "step": 56090 }, { "epoch": 0.9849189768078793, "grad_norm": 0.05787847886658095, "learning_rate": 0.00018448969028040583, "loss": 0.8509, "step": 56100 }, { "epoch": 0.9850945416878808, "grad_norm": 0.0565327661723438, "learning_rate": 0.00018448370121095645, "loss": 0.8509, "step": 56110 }, { "epoch": 0.9852701065678822, "grad_norm": 0.05670153000843854, "learning_rate": 0.0001844777110837526, "loss": 0.8487, "step": 56120 }, { "epoch": 0.9854456714478835, "grad_norm": 0.0652228840376706, "learning_rate": 0.0001844717198988701, "loss": 0.847, "step": 56130 }, { "epoch": 0.985621236327885, "grad_norm": 0.05013247029384433, "learning_rate": 0.00018446572765638494, "loss": 0.858, "step": 56140 }, { "epoch": 0.9857968012078864, "grad_norm": 0.047333554475636315, "learning_rate": 0.00018445973435637304, "loss": 0.8431, "step": 56150 }, { "epoch": 0.9859723660878877, "grad_norm": 0.0494248327352652, "learning_rate": 0.00018445373999891027, "loss": 0.8546, "step": 56160 }, { "epoch": 0.9861479309678892, "grad_norm": 0.07182256827166052, "learning_rate": 0.00018444774458407264, "loss": 0.8477, "step": 56170 }, { "epoch": 0.9863234958478906, "grad_norm": 0.05194279666653261, "learning_rate": 0.00018444174811193607, "loss": 0.8547, "step": 56180 }, { "epoch": 0.986499060727892, "grad_norm": 0.04932393231451287, "learning_rate": 0.00018443575058257658, "loss": 0.8511, "step": 56190 }, { "epoch": 0.9866746256078934, "grad_norm": 0.060348620870103266, "learning_rate": 0.00018442975199607016, "loss": 0.8443, "step": 56200 }, { "epoch": 0.9868501904878948, "grad_norm": 0.06042712555335187, "learning_rate": 0.00018442375235249274, "loss": 0.8527, "step": 56210 }, { "epoch": 0.9870257553678962, "grad_norm": 0.056347009801086395, "learning_rate": 0.00018441775165192043, "loss": 0.8549, "step": 56220 }, { "epoch": 0.9872013202478976, "grad_norm": 0.06728413538086979, "learning_rate": 0.0001844117498944292, "loss": 0.8475, "step": 56230 }, { "epoch": 0.987376885127899, "grad_norm": 0.053558459664971955, "learning_rate": 0.00018440574708009511, "loss": 0.8576, "step": 56240 }, { "epoch": 0.9875524500079004, "grad_norm": 0.059613266674837434, "learning_rate": 0.00018439974320899423, "loss": 0.8533, "step": 56250 }, { "epoch": 0.9877280148879019, "grad_norm": 0.07330765755748743, "learning_rate": 0.00018439373828120262, "loss": 0.8455, "step": 56260 }, { "epoch": 0.9879035797679032, "grad_norm": 0.06545137923766547, "learning_rate": 0.00018438773229679636, "loss": 0.8521, "step": 56270 }, { "epoch": 0.9880791446479046, "grad_norm": 0.0681413508826837, "learning_rate": 0.00018438172525585159, "loss": 0.8476, "step": 56280 }, { "epoch": 0.9882547095279061, "grad_norm": 0.054582687974819806, "learning_rate": 0.00018437571715844433, "loss": 0.851, "step": 56290 }, { "epoch": 0.9884302744079074, "grad_norm": 0.0787039512134953, "learning_rate": 0.00018436970800465077, "loss": 0.8565, "step": 56300 }, { "epoch": 0.9886058392879088, "grad_norm": 0.06792270035908422, "learning_rate": 0.00018436369779454702, "loss": 0.8443, "step": 56310 }, { "epoch": 0.9887814041679103, "grad_norm": 0.056951868090402744, "learning_rate": 0.00018435768652820926, "loss": 0.8462, "step": 56320 }, { "epoch": 0.9889569690479116, "grad_norm": 0.07037454172714246, "learning_rate": 0.0001843516742057136, "loss": 0.8492, "step": 56330 }, { "epoch": 0.9891325339279131, "grad_norm": 0.06976826237475375, "learning_rate": 0.00018434566082713632, "loss": 0.8516, "step": 56340 }, { "epoch": 0.9893080988079145, "grad_norm": 0.10132524158005675, "learning_rate": 0.0001843396463925535, "loss": 0.8567, "step": 56350 }, { "epoch": 0.9894836636879158, "grad_norm": 0.06174736600354629, "learning_rate": 0.0001843336309020414, "loss": 0.8481, "step": 56360 }, { "epoch": 0.9896592285679173, "grad_norm": 0.08478511145093871, "learning_rate": 0.00018432761435567625, "loss": 0.8485, "step": 56370 }, { "epoch": 0.9898347934479187, "grad_norm": 0.05609521311932699, "learning_rate": 0.00018432159675353421, "loss": 0.8488, "step": 56380 }, { "epoch": 0.9900103583279201, "grad_norm": 0.06905812628860275, "learning_rate": 0.0001843155780956916, "loss": 0.8467, "step": 56390 }, { "epoch": 0.9901859232079215, "grad_norm": 0.06674349190671393, "learning_rate": 0.0001843095583822246, "loss": 0.8531, "step": 56400 }, { "epoch": 0.9903614880879229, "grad_norm": 0.06104107820700924, "learning_rate": 0.0001843035376132096, "loss": 0.8486, "step": 56410 }, { "epoch": 0.9905370529679243, "grad_norm": 0.05970292129141999, "learning_rate": 0.00018429751578872275, "loss": 0.8535, "step": 56420 }, { "epoch": 0.9907126178479257, "grad_norm": 0.058194744225966696, "learning_rate": 0.00018429149290884048, "loss": 0.8501, "step": 56430 }, { "epoch": 0.9908881827279271, "grad_norm": 0.06849019508211356, "learning_rate": 0.000184285468973639, "loss": 0.8464, "step": 56440 }, { "epoch": 0.9910637476079285, "grad_norm": 0.04606362680712893, "learning_rate": 0.00018427944398319464, "loss": 0.8452, "step": 56450 }, { "epoch": 0.99123931248793, "grad_norm": 0.06368143886102015, "learning_rate": 0.00018427341793758378, "loss": 0.851, "step": 56460 }, { "epoch": 0.9914148773679313, "grad_norm": 0.09604199952190666, "learning_rate": 0.00018426739083688277, "loss": 0.8481, "step": 56470 }, { "epoch": 0.9915904422479327, "grad_norm": 0.06421954434437761, "learning_rate": 0.00018426136268116797, "loss": 0.846, "step": 56480 }, { "epoch": 0.9917660071279342, "grad_norm": 0.08337476812986204, "learning_rate": 0.00018425533347051572, "loss": 0.8477, "step": 56490 }, { "epoch": 0.9919415720079355, "grad_norm": 0.05520254983394486, "learning_rate": 0.0001842493032050024, "loss": 0.8527, "step": 56500 }, { "epoch": 0.9921171368879369, "grad_norm": 0.09347147933649798, "learning_rate": 0.0001842432718847045, "loss": 0.8524, "step": 56510 }, { "epoch": 0.9922927017679384, "grad_norm": 0.07481419935521504, "learning_rate": 0.0001842372395096984, "loss": 0.8475, "step": 56520 }, { "epoch": 0.9924682666479397, "grad_norm": 0.06542498658361165, "learning_rate": 0.00018423120608006052, "loss": 0.8549, "step": 56530 }, { "epoch": 0.9926438315279411, "grad_norm": 0.05573432662120896, "learning_rate": 0.0001842251715958673, "loss": 0.8516, "step": 56540 }, { "epoch": 0.9928193964079426, "grad_norm": 0.07036756734125, "learning_rate": 0.00018421913605719522, "loss": 0.8438, "step": 56550 }, { "epoch": 0.992994961287944, "grad_norm": 0.05619597439454494, "learning_rate": 0.00018421309946412074, "loss": 0.8543, "step": 56560 }, { "epoch": 0.9931705261679453, "grad_norm": 0.0649945714224265, "learning_rate": 0.00018420706181672031, "loss": 0.8502, "step": 56570 }, { "epoch": 0.9933460910479468, "grad_norm": 0.07061338610675556, "learning_rate": 0.00018420102311507048, "loss": 0.8543, "step": 56580 }, { "epoch": 0.9935216559279482, "grad_norm": 0.05051706594064909, "learning_rate": 0.00018419498335924775, "loss": 0.8408, "step": 56590 }, { "epoch": 0.9936972208079495, "grad_norm": 0.09295975910918253, "learning_rate": 0.00018418894254932863, "loss": 0.8484, "step": 56600 }, { "epoch": 0.993872785687951, "grad_norm": 0.05899447647305215, "learning_rate": 0.00018418290068538965, "loss": 0.8572, "step": 56610 }, { "epoch": 0.9940483505679524, "grad_norm": 0.05268594729296781, "learning_rate": 0.00018417685776750742, "loss": 0.8443, "step": 56620 }, { "epoch": 0.9942239154479537, "grad_norm": 0.06315275451873188, "learning_rate": 0.0001841708137957584, "loss": 0.8519, "step": 56630 }, { "epoch": 0.9943994803279552, "grad_norm": 0.05647097474578115, "learning_rate": 0.00018416476877021926, "loss": 0.8461, "step": 56640 }, { "epoch": 0.9945750452079566, "grad_norm": 0.06620048972860083, "learning_rate": 0.00018415872269096658, "loss": 0.8536, "step": 56650 }, { "epoch": 0.994750610087958, "grad_norm": 0.04766645031854222, "learning_rate": 0.0001841526755580769, "loss": 0.8472, "step": 56660 }, { "epoch": 0.9949261749679594, "grad_norm": 0.07517769258811252, "learning_rate": 0.00018414662737162695, "loss": 0.8509, "step": 56670 }, { "epoch": 0.9951017398479608, "grad_norm": 0.06786891644699258, "learning_rate": 0.00018414057813169324, "loss": 0.8533, "step": 56680 }, { "epoch": 0.9952773047279622, "grad_norm": 0.05362721374942034, "learning_rate": 0.00018413452783835248, "loss": 0.8534, "step": 56690 }, { "epoch": 0.9954528696079636, "grad_norm": 0.042971255971688375, "learning_rate": 0.00018412847649168133, "loss": 0.8427, "step": 56700 }, { "epoch": 0.995628434487965, "grad_norm": 0.0652211044519968, "learning_rate": 0.00018412242409175647, "loss": 0.8417, "step": 56710 }, { "epoch": 0.9958039993679665, "grad_norm": 0.057365994117766286, "learning_rate": 0.0001841163706386545, "loss": 0.8581, "step": 56720 }, { "epoch": 0.9959795642479679, "grad_norm": 0.07947493003352432, "learning_rate": 0.00018411031613245227, "loss": 0.8607, "step": 56730 }, { "epoch": 0.9961551291279692, "grad_norm": 0.06411656465794277, "learning_rate": 0.00018410426057322634, "loss": 0.8538, "step": 56740 }, { "epoch": 0.9963306940079707, "grad_norm": 0.06188887999291985, "learning_rate": 0.00018409820396105353, "loss": 0.8576, "step": 56750 }, { "epoch": 0.9965062588879721, "grad_norm": 0.05357051268500942, "learning_rate": 0.00018409214629601056, "loss": 0.8506, "step": 56760 }, { "epoch": 0.9966818237679734, "grad_norm": 0.09782788521548237, "learning_rate": 0.00018408608757817415, "loss": 0.8592, "step": 56770 }, { "epoch": 0.9968573886479749, "grad_norm": 0.06770789578374656, "learning_rate": 0.0001840800278076211, "loss": 0.8542, "step": 56780 }, { "epoch": 0.9970329535279763, "grad_norm": 0.07003504006397199, "learning_rate": 0.00018407396698442817, "loss": 0.8452, "step": 56790 }, { "epoch": 0.9972085184079776, "grad_norm": 0.07361054241525321, "learning_rate": 0.00018406790510867217, "loss": 0.8472, "step": 56800 }, { "epoch": 0.9973840832879791, "grad_norm": 0.05888558212884699, "learning_rate": 0.00018406184218042987, "loss": 0.8499, "step": 56810 }, { "epoch": 0.9975596481679805, "grad_norm": 0.05470370472504471, "learning_rate": 0.00018405577819977814, "loss": 0.8466, "step": 56820 }, { "epoch": 0.9977352130479819, "grad_norm": 0.059073399945017376, "learning_rate": 0.00018404971316679375, "loss": 0.8539, "step": 56830 }, { "epoch": 0.9979107779279833, "grad_norm": 0.06191039384330902, "learning_rate": 0.0001840436470815536, "loss": 0.8511, "step": 56840 }, { "epoch": 0.9980863428079847, "grad_norm": 0.057650708398638235, "learning_rate": 0.00018403757994413453, "loss": 0.8495, "step": 56850 }, { "epoch": 0.9982619076879861, "grad_norm": 0.10941963656602653, "learning_rate": 0.00018403151175461338, "loss": 0.8454, "step": 56860 }, { "epoch": 0.9984374725679875, "grad_norm": 0.0520239432752115, "learning_rate": 0.0001840254425130671, "loss": 0.8553, "step": 56870 }, { "epoch": 0.9986130374479889, "grad_norm": 0.06854825422446295, "learning_rate": 0.00018401937221957252, "loss": 0.8448, "step": 56880 }, { "epoch": 0.9987886023279903, "grad_norm": 0.08650789307158079, "learning_rate": 0.00018401330087420659, "loss": 0.8452, "step": 56890 }, { "epoch": 0.9989641672079917, "grad_norm": 0.0737723513275301, "learning_rate": 0.00018400722847704626, "loss": 0.8511, "step": 56900 }, { "epoch": 0.9991397320879931, "grad_norm": 0.06481005578264762, "learning_rate": 0.0001840011550281684, "loss": 0.8506, "step": 56910 }, { "epoch": 0.9993152969679945, "grad_norm": 0.07010584482412711, "learning_rate": 0.00018399508052765, "loss": 0.8511, "step": 56920 }, { "epoch": 0.999490861847996, "grad_norm": 0.05847443267702205, "learning_rate": 0.00018398900497556804, "loss": 0.8477, "step": 56930 }, { "epoch": 0.9996664267279973, "grad_norm": 0.07484785510055697, "learning_rate": 0.00018398292837199946, "loss": 0.8526, "step": 56940 }, { "epoch": 0.9998419916079987, "grad_norm": 0.04767599237392813, "learning_rate": 0.00018397685071702133, "loss": 0.8498, "step": 56950 }, { "epoch": 1.000017556488, "grad_norm": 0.06918703166494092, "learning_rate": 0.00018397077201071056, "loss": 0.8581, "step": 56960 }, { "epoch": 1.0001931213680015, "grad_norm": 0.06473117854452935, "learning_rate": 0.0001839646922531442, "loss": 0.8505, "step": 56970 }, { "epoch": 1.000368686248003, "grad_norm": 0.05680574688835367, "learning_rate": 0.00018395861144439928, "loss": 0.8445, "step": 56980 }, { "epoch": 1.0005442511280043, "grad_norm": 0.06484087942590894, "learning_rate": 0.00018395252958455286, "loss": 0.851, "step": 56990 }, { "epoch": 1.0007198160080057, "grad_norm": 0.05203039329303289, "learning_rate": 0.00018394644667368203, "loss": 0.8556, "step": 57000 }, { "epoch": 1.0008953808880072, "grad_norm": 0.05403870907851163, "learning_rate": 0.00018394036271186378, "loss": 0.8478, "step": 57010 }, { "epoch": 1.0010709457680085, "grad_norm": 0.08053702986131214, "learning_rate": 0.00018393427769917525, "loss": 0.8586, "step": 57020 }, { "epoch": 1.00124651064801, "grad_norm": 0.05937897307065659, "learning_rate": 0.00018392819163569354, "loss": 0.859, "step": 57030 }, { "epoch": 1.0014220755280114, "grad_norm": 0.09016981125393919, "learning_rate": 0.00018392210452149575, "loss": 0.8473, "step": 57040 }, { "epoch": 1.0015976404080127, "grad_norm": 0.058537850534267896, "learning_rate": 0.00018391601635665898, "loss": 0.8606, "step": 57050 }, { "epoch": 1.0017732052880142, "grad_norm": 0.06643852484811973, "learning_rate": 0.00018390992714126042, "loss": 0.8476, "step": 57060 }, { "epoch": 1.0019487701680156, "grad_norm": 0.06052649080438298, "learning_rate": 0.0001839038368753772, "loss": 0.8558, "step": 57070 }, { "epoch": 1.002124335048017, "grad_norm": 0.052190441801842014, "learning_rate": 0.00018389774555908642, "loss": 0.8544, "step": 57080 }, { "epoch": 1.0022998999280184, "grad_norm": 0.08415895354995483, "learning_rate": 0.00018389165319246536, "loss": 0.8469, "step": 57090 }, { "epoch": 1.0024754648080199, "grad_norm": 0.09632799307428629, "learning_rate": 0.00018388555977559117, "loss": 0.8523, "step": 57100 }, { "epoch": 1.0026510296880211, "grad_norm": 0.06392317303066189, "learning_rate": 0.00018387946530854107, "loss": 0.846, "step": 57110 }, { "epoch": 1.0028265945680226, "grad_norm": 0.06347544817496636, "learning_rate": 0.0001838733697913922, "loss": 0.8507, "step": 57120 }, { "epoch": 1.003002159448024, "grad_norm": 0.06965695771566174, "learning_rate": 0.0001838672732242219, "loss": 0.8573, "step": 57130 }, { "epoch": 1.0031777243280253, "grad_norm": 0.05952840127679832, "learning_rate": 0.00018386117560710737, "loss": 0.851, "step": 57140 }, { "epoch": 1.0033532892080268, "grad_norm": 0.0633402594274687, "learning_rate": 0.00018385507694012584, "loss": 0.8536, "step": 57150 }, { "epoch": 1.0035288540880283, "grad_norm": 0.08668062767967304, "learning_rate": 0.0001838489772233546, "loss": 0.8536, "step": 57160 }, { "epoch": 1.0037044189680295, "grad_norm": 0.06663811168778713, "learning_rate": 0.00018384287645687098, "loss": 0.8539, "step": 57170 }, { "epoch": 1.003879983848031, "grad_norm": 0.06462644160106652, "learning_rate": 0.00018383677464075223, "loss": 0.8485, "step": 57180 }, { "epoch": 1.0040555487280325, "grad_norm": 0.07479265302812643, "learning_rate": 0.00018383067177507565, "loss": 0.8518, "step": 57190 }, { "epoch": 1.0042311136080337, "grad_norm": 0.06858342921845773, "learning_rate": 0.00018382456785991856, "loss": 0.8568, "step": 57200 }, { "epoch": 1.0044066784880352, "grad_norm": 0.046862929655702074, "learning_rate": 0.00018381846289535836, "loss": 0.8489, "step": 57210 }, { "epoch": 1.0045822433680367, "grad_norm": 0.071670336053507, "learning_rate": 0.00018381235688147237, "loss": 0.8503, "step": 57220 }, { "epoch": 1.004757808248038, "grad_norm": 0.0578126278571733, "learning_rate": 0.00018380624981833793, "loss": 0.8577, "step": 57230 }, { "epoch": 1.0049333731280394, "grad_norm": 0.061097077520016155, "learning_rate": 0.00018380014170603243, "loss": 0.8535, "step": 57240 }, { "epoch": 1.005108938008041, "grad_norm": 0.07323172040660607, "learning_rate": 0.00018379403254463328, "loss": 0.8488, "step": 57250 }, { "epoch": 1.0052845028880424, "grad_norm": 0.052290820354976446, "learning_rate": 0.00018378792233421788, "loss": 0.8452, "step": 57260 }, { "epoch": 1.0054600677680436, "grad_norm": 0.09005740821223741, "learning_rate": 0.0001837818110748636, "loss": 0.843, "step": 57270 }, { "epoch": 1.0056356326480451, "grad_norm": 0.06213299299029477, "learning_rate": 0.00018377569876664793, "loss": 0.8542, "step": 57280 }, { "epoch": 1.0058111975280466, "grad_norm": 0.05617461346883926, "learning_rate": 0.00018376958540964827, "loss": 0.8583, "step": 57290 }, { "epoch": 1.0059867624080479, "grad_norm": 0.06148942962679741, "learning_rate": 0.00018376347100394213, "loss": 0.8453, "step": 57300 }, { "epoch": 1.0061623272880493, "grad_norm": 0.05474982171720353, "learning_rate": 0.00018375735554960692, "loss": 0.8489, "step": 57310 }, { "epoch": 1.0063378921680508, "grad_norm": 0.056435434873019746, "learning_rate": 0.00018375123904672023, "loss": 0.844, "step": 57320 }, { "epoch": 1.006513457048052, "grad_norm": 0.05010497524289432, "learning_rate": 0.0001837451214953594, "loss": 0.8537, "step": 57330 }, { "epoch": 1.0066890219280535, "grad_norm": 0.04836249912443953, "learning_rate": 0.0001837390028956021, "loss": 0.8527, "step": 57340 }, { "epoch": 1.006864586808055, "grad_norm": 0.08644161719035882, "learning_rate": 0.00018373288324752572, "loss": 0.8539, "step": 57350 }, { "epoch": 1.0070401516880563, "grad_norm": 0.09109686916040854, "learning_rate": 0.00018372676255120786, "loss": 0.8473, "step": 57360 }, { "epoch": 1.0072157165680578, "grad_norm": 0.06927543306610721, "learning_rate": 0.00018372064080672605, "loss": 0.8467, "step": 57370 }, { "epoch": 1.0073912814480592, "grad_norm": 0.06497491409870639, "learning_rate": 0.00018371451801415788, "loss": 0.849, "step": 57380 }, { "epoch": 1.0075668463280605, "grad_norm": 0.08051949144149584, "learning_rate": 0.0001837083941735809, "loss": 0.8448, "step": 57390 }, { "epoch": 1.007742411208062, "grad_norm": 0.05035970355877847, "learning_rate": 0.00018370226928507275, "loss": 0.8484, "step": 57400 }, { "epoch": 1.0079179760880634, "grad_norm": 0.06262873140271641, "learning_rate": 0.000183696143348711, "loss": 0.8549, "step": 57410 }, { "epoch": 1.0080935409680647, "grad_norm": 0.07088847967570565, "learning_rate": 0.00018369001636457324, "loss": 0.8447, "step": 57420 }, { "epoch": 1.0082691058480662, "grad_norm": 0.06412971045479393, "learning_rate": 0.00018368388833273712, "loss": 0.8451, "step": 57430 }, { "epoch": 1.0084446707280676, "grad_norm": 0.04731865317324123, "learning_rate": 0.00018367775925328032, "loss": 0.8549, "step": 57440 }, { "epoch": 1.008620235608069, "grad_norm": 0.05165386632671862, "learning_rate": 0.00018367162912628045, "loss": 0.8495, "step": 57450 }, { "epoch": 1.0087958004880704, "grad_norm": 0.07100144485889673, "learning_rate": 0.0001836654979518152, "loss": 0.8523, "step": 57460 }, { "epoch": 1.0089713653680719, "grad_norm": 0.054626970270764, "learning_rate": 0.00018365936572996224, "loss": 0.8424, "step": 57470 }, { "epoch": 1.0091469302480731, "grad_norm": 0.05375977869279286, "learning_rate": 0.00018365323246079927, "loss": 0.8577, "step": 57480 }, { "epoch": 1.0093224951280746, "grad_norm": 0.0633282068624186, "learning_rate": 0.000183647098144404, "loss": 0.8534, "step": 57490 }, { "epoch": 1.009498060008076, "grad_norm": 0.05454254288494738, "learning_rate": 0.00018364096278085414, "loss": 0.8552, "step": 57500 }, { "epoch": 1.0096736248880773, "grad_norm": 0.0808417513824939, "learning_rate": 0.00018363482637022752, "loss": 0.849, "step": 57510 }, { "epoch": 1.0098491897680788, "grad_norm": 0.08147130000666776, "learning_rate": 0.00018362868891260174, "loss": 0.8585, "step": 57520 }, { "epoch": 1.0100247546480803, "grad_norm": 0.04893691527522899, "learning_rate": 0.00018362255040805468, "loss": 0.8511, "step": 57530 }, { "epoch": 1.0102003195280815, "grad_norm": 0.0567422245883815, "learning_rate": 0.000183616410856664, "loss": 0.8494, "step": 57540 }, { "epoch": 1.010375884408083, "grad_norm": 0.04703852926032065, "learning_rate": 0.00018361027025850762, "loss": 0.8543, "step": 57550 }, { "epoch": 1.0105514492880845, "grad_norm": 0.08210293026836615, "learning_rate": 0.00018360412861366328, "loss": 0.8496, "step": 57560 }, { "epoch": 1.0107270141680857, "grad_norm": 0.07525212557536587, "learning_rate": 0.0001835979859222088, "loss": 0.8539, "step": 57570 }, { "epoch": 1.0109025790480872, "grad_norm": 0.05547109797913806, "learning_rate": 0.000183591842184222, "loss": 0.8555, "step": 57580 }, { "epoch": 1.0110781439280887, "grad_norm": 0.07657341377584027, "learning_rate": 0.00018358569739978075, "loss": 0.8601, "step": 57590 }, { "epoch": 1.01125370880809, "grad_norm": 0.05401781220301768, "learning_rate": 0.00018357955156896285, "loss": 0.8501, "step": 57600 }, { "epoch": 1.0114292736880914, "grad_norm": 0.0541357695761068, "learning_rate": 0.00018357340469184625, "loss": 0.8524, "step": 57610 }, { "epoch": 1.011604838568093, "grad_norm": 0.061427691261375504, "learning_rate": 0.00018356725676850876, "loss": 0.8464, "step": 57620 }, { "epoch": 1.0117804034480942, "grad_norm": 0.05986837127715629, "learning_rate": 0.00018356110779902828, "loss": 0.8561, "step": 57630 }, { "epoch": 1.0119559683280956, "grad_norm": 0.0742637065059885, "learning_rate": 0.00018355495778348279, "loss": 0.8498, "step": 57640 }, { "epoch": 1.0121315332080971, "grad_norm": 0.06785985693396415, "learning_rate": 0.0001835488067219501, "loss": 0.855, "step": 57650 }, { "epoch": 1.0123070980880984, "grad_norm": 0.05278582111640081, "learning_rate": 0.00018354265461450827, "loss": 0.8528, "step": 57660 }, { "epoch": 1.0124826629680999, "grad_norm": 0.07342483253433364, "learning_rate": 0.00018353650146123515, "loss": 0.8467, "step": 57670 }, { "epoch": 1.0126582278481013, "grad_norm": 0.05807942433717392, "learning_rate": 0.0001835303472622088, "loss": 0.8594, "step": 57680 }, { "epoch": 1.0128337927281026, "grad_norm": 0.056591697621899806, "learning_rate": 0.00018352419201750708, "loss": 0.8501, "step": 57690 }, { "epoch": 1.013009357608104, "grad_norm": 0.05640676716357161, "learning_rate": 0.00018351803572720803, "loss": 0.8518, "step": 57700 }, { "epoch": 1.0131849224881055, "grad_norm": 0.0689348591332243, "learning_rate": 0.00018351187839138963, "loss": 0.8418, "step": 57710 }, { "epoch": 1.0133604873681068, "grad_norm": 0.076867691032374, "learning_rate": 0.00018350572001012998, "loss": 0.8519, "step": 57720 }, { "epoch": 1.0135360522481083, "grad_norm": 0.0729871281239425, "learning_rate": 0.00018349956058350702, "loss": 0.844, "step": 57730 }, { "epoch": 1.0137116171281098, "grad_norm": 0.05826216539829217, "learning_rate": 0.00018349340011159881, "loss": 0.8492, "step": 57740 }, { "epoch": 1.013887182008111, "grad_norm": 0.06688871895810904, "learning_rate": 0.0001834872385944834, "loss": 0.8413, "step": 57750 }, { "epoch": 1.0140627468881125, "grad_norm": 0.07285981851454083, "learning_rate": 0.0001834810760322389, "loss": 0.852, "step": 57760 }, { "epoch": 1.014238311768114, "grad_norm": 0.04928503133337006, "learning_rate": 0.0001834749124249433, "loss": 0.8533, "step": 57770 }, { "epoch": 1.0144138766481152, "grad_norm": 0.08917165412230857, "learning_rate": 0.0001834687477726748, "loss": 0.8502, "step": 57780 }, { "epoch": 1.0145894415281167, "grad_norm": 0.04826672034599844, "learning_rate": 0.00018346258207551145, "loss": 0.8467, "step": 57790 }, { "epoch": 1.0147650064081182, "grad_norm": 0.07794241527952483, "learning_rate": 0.0001834564153335314, "loss": 0.8546, "step": 57800 }, { "epoch": 1.0149405712881194, "grad_norm": 0.06603981556994042, "learning_rate": 0.00018345024754681276, "loss": 0.8502, "step": 57810 }, { "epoch": 1.015116136168121, "grad_norm": 0.050124003714769716, "learning_rate": 0.00018344407871543367, "loss": 0.8572, "step": 57820 }, { "epoch": 1.0152917010481224, "grad_norm": 0.07128740496516624, "learning_rate": 0.0001834379088394723, "loss": 0.8581, "step": 57830 }, { "epoch": 1.0154672659281236, "grad_norm": 0.059345822393300336, "learning_rate": 0.00018343173791900684, "loss": 0.844, "step": 57840 }, { "epoch": 1.0156428308081251, "grad_norm": 0.10336341097415111, "learning_rate": 0.0001834255659541154, "loss": 0.8474, "step": 57850 }, { "epoch": 1.0158183956881266, "grad_norm": 0.07664044718176762, "learning_rate": 0.00018341939294487633, "loss": 0.8511, "step": 57860 }, { "epoch": 1.0159939605681279, "grad_norm": 0.06834540617730213, "learning_rate": 0.0001834132188913677, "loss": 0.8602, "step": 57870 }, { "epoch": 1.0161695254481293, "grad_norm": 0.07175827046058275, "learning_rate": 0.00018340704379366784, "loss": 0.8413, "step": 57880 }, { "epoch": 1.0163450903281308, "grad_norm": 0.06567441791757853, "learning_rate": 0.00018340086765185492, "loss": 0.8501, "step": 57890 }, { "epoch": 1.016520655208132, "grad_norm": 0.04870018871827384, "learning_rate": 0.0001833946904660072, "loss": 0.8461, "step": 57900 }, { "epoch": 1.0166962200881335, "grad_norm": 0.07386237666885187, "learning_rate": 0.00018338851223620296, "loss": 0.8477, "step": 57910 }, { "epoch": 1.016871784968135, "grad_norm": 0.056876958540823225, "learning_rate": 0.0001833823329625205, "loss": 0.8499, "step": 57920 }, { "epoch": 1.0170473498481363, "grad_norm": 0.08787092546782102, "learning_rate": 0.0001833761526450381, "loss": 0.8583, "step": 57930 }, { "epoch": 1.0172229147281378, "grad_norm": 0.05715370023223837, "learning_rate": 0.00018336997128383403, "loss": 0.8541, "step": 57940 }, { "epoch": 1.0173984796081392, "grad_norm": 0.07200939597576614, "learning_rate": 0.00018336378887898666, "loss": 0.8453, "step": 57950 }, { "epoch": 1.0175740444881405, "grad_norm": 0.053764345622941495, "learning_rate": 0.00018335760543057429, "loss": 0.8404, "step": 57960 }, { "epoch": 1.017749609368142, "grad_norm": 0.06337248060573794, "learning_rate": 0.00018335142093867525, "loss": 0.8551, "step": 57970 }, { "epoch": 1.0179251742481434, "grad_norm": 0.06600124555816574, "learning_rate": 0.00018334523540336796, "loss": 0.8517, "step": 57980 }, { "epoch": 1.018100739128145, "grad_norm": 0.06953754689030639, "learning_rate": 0.00018333904882473074, "loss": 0.8548, "step": 57990 }, { "epoch": 1.0182763040081462, "grad_norm": 0.04981018855039167, "learning_rate": 0.000183332861202842, "loss": 0.8419, "step": 58000 }, { "epoch": 1.0184518688881476, "grad_norm": 0.0652896179346685, "learning_rate": 0.00018332667253778012, "loss": 0.8508, "step": 58010 }, { "epoch": 1.0186274337681491, "grad_norm": 0.05761211532589112, "learning_rate": 0.00018332048282962353, "loss": 0.8524, "step": 58020 }, { "epoch": 1.0188029986481504, "grad_norm": 0.08594992803423747, "learning_rate": 0.00018331429207845066, "loss": 0.8494, "step": 58030 }, { "epoch": 1.0189785635281519, "grad_norm": 0.059993933926691244, "learning_rate": 0.00018330810028433991, "loss": 0.8532, "step": 58040 }, { "epoch": 1.0191541284081533, "grad_norm": 0.09186723659652489, "learning_rate": 0.00018330190744736977, "loss": 0.8537, "step": 58050 }, { "epoch": 1.0193296932881546, "grad_norm": 0.07865287952263571, "learning_rate": 0.00018329571356761866, "loss": 0.8593, "step": 58060 }, { "epoch": 1.019505258168156, "grad_norm": 0.07047124366077023, "learning_rate": 0.00018328951864516512, "loss": 0.8456, "step": 58070 }, { "epoch": 1.0196808230481575, "grad_norm": 0.07841198988301461, "learning_rate": 0.0001832833226800876, "loss": 0.8514, "step": 58080 }, { "epoch": 1.0198563879281588, "grad_norm": 0.09499799047554278, "learning_rate": 0.00018327712567246462, "loss": 0.8577, "step": 58090 }, { "epoch": 1.0200319528081603, "grad_norm": 0.05308330059468653, "learning_rate": 0.00018327092762237466, "loss": 0.8549, "step": 58100 }, { "epoch": 1.0202075176881618, "grad_norm": 0.06660770225212095, "learning_rate": 0.0001832647285298963, "loss": 0.8545, "step": 58110 }, { "epoch": 1.020383082568163, "grad_norm": 0.062378153627281864, "learning_rate": 0.00018325852839510808, "loss": 0.8504, "step": 58120 }, { "epoch": 1.0205586474481645, "grad_norm": 0.04788546901018771, "learning_rate": 0.00018325232721808852, "loss": 0.8428, "step": 58130 }, { "epoch": 1.020734212328166, "grad_norm": 0.0705226793792809, "learning_rate": 0.0001832461249989162, "loss": 0.8547, "step": 58140 }, { "epoch": 1.0209097772081672, "grad_norm": 0.06192753789002412, "learning_rate": 0.00018323992173766975, "loss": 0.852, "step": 58150 }, { "epoch": 1.0210853420881687, "grad_norm": 0.06102591086115269, "learning_rate": 0.0001832337174344277, "loss": 0.8576, "step": 58160 }, { "epoch": 1.0212609069681702, "grad_norm": 0.06295166686828899, "learning_rate": 0.0001832275120892687, "loss": 0.8547, "step": 58170 }, { "epoch": 1.0214364718481714, "grad_norm": 0.07093651005780331, "learning_rate": 0.00018322130570227134, "loss": 0.8569, "step": 58180 }, { "epoch": 1.021612036728173, "grad_norm": 0.05713028662149447, "learning_rate": 0.00018321509827351427, "loss": 0.8484, "step": 58190 }, { "epoch": 1.0217876016081744, "grad_norm": 0.07693604598521016, "learning_rate": 0.00018320888980307622, "loss": 0.8544, "step": 58200 }, { "epoch": 1.0219631664881756, "grad_norm": 0.05192900159407114, "learning_rate": 0.00018320268029103573, "loss": 0.8525, "step": 58210 }, { "epoch": 1.0221387313681771, "grad_norm": 0.06363163802715563, "learning_rate": 0.00018319646973747152, "loss": 0.8564, "step": 58220 }, { "epoch": 1.0223142962481786, "grad_norm": 0.07238007669623532, "learning_rate": 0.0001831902581424623, "loss": 0.8525, "step": 58230 }, { "epoch": 1.0224898611281799, "grad_norm": 0.07321827542421633, "learning_rate": 0.00018318404550608677, "loss": 0.8515, "step": 58240 }, { "epoch": 1.0226654260081813, "grad_norm": 0.08254472669492502, "learning_rate": 0.00018317783182842363, "loss": 0.849, "step": 58250 }, { "epoch": 1.0228409908881828, "grad_norm": 0.0501080021301721, "learning_rate": 0.00018317161710955162, "loss": 0.8582, "step": 58260 }, { "epoch": 1.023016555768184, "grad_norm": 0.06675235332927723, "learning_rate": 0.00018316540134954946, "loss": 0.8512, "step": 58270 }, { "epoch": 1.0231921206481855, "grad_norm": 0.06738256244009651, "learning_rate": 0.0001831591845484959, "loss": 0.8415, "step": 58280 }, { "epoch": 1.023367685528187, "grad_norm": 0.09662761242424181, "learning_rate": 0.00018315296670646974, "loss": 0.8637, "step": 58290 }, { "epoch": 1.0235432504081883, "grad_norm": 0.06284790630972853, "learning_rate": 0.00018314674782354972, "loss": 0.8558, "step": 58300 }, { "epoch": 1.0237188152881898, "grad_norm": 0.06755791319114997, "learning_rate": 0.0001831405278998147, "loss": 0.8514, "step": 58310 }, { "epoch": 1.0238943801681912, "grad_norm": 0.06423521556295124, "learning_rate": 0.00018313430693534346, "loss": 0.8554, "step": 58320 }, { "epoch": 1.0240699450481925, "grad_norm": 0.05839798261831497, "learning_rate": 0.00018312808493021478, "loss": 0.8443, "step": 58330 }, { "epoch": 1.024245509928194, "grad_norm": 0.05185306284459377, "learning_rate": 0.00018312186188450754, "loss": 0.8576, "step": 58340 }, { "epoch": 1.0244210748081954, "grad_norm": 0.08794994255450836, "learning_rate": 0.00018311563779830055, "loss": 0.8487, "step": 58350 }, { "epoch": 1.0245966396881967, "grad_norm": 0.06678542137420172, "learning_rate": 0.0001831094126716727, "loss": 0.8557, "step": 58360 }, { "epoch": 1.0247722045681982, "grad_norm": 0.05752658197096915, "learning_rate": 0.00018310318650470287, "loss": 0.8478, "step": 58370 }, { "epoch": 1.0249477694481997, "grad_norm": 0.05336561239335987, "learning_rate": 0.00018309695929746993, "loss": 0.8504, "step": 58380 }, { "epoch": 1.025123334328201, "grad_norm": 0.0653095826699686, "learning_rate": 0.00018309073105005278, "loss": 0.853, "step": 58390 }, { "epoch": 1.0252988992082024, "grad_norm": 0.07088007532003532, "learning_rate": 0.0001830845017625303, "loss": 0.8511, "step": 58400 }, { "epoch": 1.0254744640882039, "grad_norm": 0.06225010916695357, "learning_rate": 0.00018307827143498152, "loss": 0.8503, "step": 58410 }, { "epoch": 1.0256500289682051, "grad_norm": 0.05851263842302583, "learning_rate": 0.00018307204006748527, "loss": 0.8538, "step": 58420 }, { "epoch": 1.0258255938482066, "grad_norm": 0.0455350449293496, "learning_rate": 0.0001830658076601205, "loss": 0.8486, "step": 58430 }, { "epoch": 1.026001158728208, "grad_norm": 0.06889666850906602, "learning_rate": 0.0001830595742129663, "loss": 0.858, "step": 58440 }, { "epoch": 1.0261767236082093, "grad_norm": 0.05406070504910497, "learning_rate": 0.00018305333972610152, "loss": 0.8433, "step": 58450 }, { "epoch": 1.0263522884882108, "grad_norm": 0.06646126789983098, "learning_rate": 0.00018304710419960523, "loss": 0.8499, "step": 58460 }, { "epoch": 1.0265278533682123, "grad_norm": 0.05558721030509597, "learning_rate": 0.00018304086763355637, "loss": 0.8524, "step": 58470 }, { "epoch": 1.0267034182482135, "grad_norm": 0.058733889263952684, "learning_rate": 0.00018303463002803403, "loss": 0.8553, "step": 58480 }, { "epoch": 1.026878983128215, "grad_norm": 0.06225521108194014, "learning_rate": 0.00018302839138311717, "loss": 0.8517, "step": 58490 }, { "epoch": 1.0270545480082165, "grad_norm": 0.05676633906632645, "learning_rate": 0.0001830221516988849, "loss": 0.8512, "step": 58500 }, { "epoch": 1.0272301128882178, "grad_norm": 0.06188892767658575, "learning_rate": 0.00018301591097541627, "loss": 0.8558, "step": 58510 }, { "epoch": 1.0274056777682192, "grad_norm": 0.09088045287313301, "learning_rate": 0.00018300966921279025, "loss": 0.8551, "step": 58520 }, { "epoch": 1.0275812426482207, "grad_norm": 0.06335220246267807, "learning_rate": 0.00018300342641108604, "loss": 0.8441, "step": 58530 }, { "epoch": 1.027756807528222, "grad_norm": 0.06428816447566754, "learning_rate": 0.00018299718257038276, "loss": 0.8421, "step": 58540 }, { "epoch": 1.0279323724082234, "grad_norm": 0.06925074819337303, "learning_rate": 0.0001829909376907594, "loss": 0.8453, "step": 58550 }, { "epoch": 1.028107937288225, "grad_norm": 0.06387005737583633, "learning_rate": 0.00018298469177229512, "loss": 0.8528, "step": 58560 }, { "epoch": 1.0282835021682262, "grad_norm": 0.10172365344309713, "learning_rate": 0.00018297844481506917, "loss": 0.8447, "step": 58570 }, { "epoch": 1.0284590670482276, "grad_norm": 0.07149841377044328, "learning_rate": 0.00018297219681916055, "loss": 0.8586, "step": 58580 }, { "epoch": 1.0286346319282291, "grad_norm": 0.07472584738402481, "learning_rate": 0.00018296594778464847, "loss": 0.8533, "step": 58590 }, { "epoch": 1.0288101968082304, "grad_norm": 0.052000393300650975, "learning_rate": 0.0001829596977116122, "loss": 0.8467, "step": 58600 }, { "epoch": 1.0289857616882319, "grad_norm": 0.06855359931435703, "learning_rate": 0.00018295344660013081, "loss": 0.8511, "step": 58610 }, { "epoch": 1.0291613265682333, "grad_norm": 0.05464095678388631, "learning_rate": 0.00018294719445028352, "loss": 0.8488, "step": 58620 }, { "epoch": 1.0293368914482346, "grad_norm": 0.0844166638275016, "learning_rate": 0.0001829409412621496, "loss": 0.8619, "step": 58630 }, { "epoch": 1.029512456328236, "grad_norm": 0.057903772648458855, "learning_rate": 0.00018293468703580823, "loss": 0.8572, "step": 58640 }, { "epoch": 1.0296880212082375, "grad_norm": 0.058665558485607686, "learning_rate": 0.00018292843177133872, "loss": 0.8457, "step": 58650 }, { "epoch": 1.0298635860882388, "grad_norm": 0.06144688219378745, "learning_rate": 0.00018292217546882021, "loss": 0.8555, "step": 58660 }, { "epoch": 1.0300391509682403, "grad_norm": 0.05957869877002116, "learning_rate": 0.0001829159181283321, "loss": 0.8424, "step": 58670 }, { "epoch": 1.0302147158482418, "grad_norm": 0.07160555239836215, "learning_rate": 0.00018290965974995354, "loss": 0.8511, "step": 58680 }, { "epoch": 1.030390280728243, "grad_norm": 0.07016293469240979, "learning_rate": 0.00018290340033376396, "loss": 0.8572, "step": 58690 }, { "epoch": 1.0305658456082445, "grad_norm": 0.06605535971517287, "learning_rate": 0.00018289713987984258, "loss": 0.8498, "step": 58700 }, { "epoch": 1.030741410488246, "grad_norm": 0.06075683839084408, "learning_rate": 0.00018289087838826876, "loss": 0.8544, "step": 58710 }, { "epoch": 1.0309169753682474, "grad_norm": 0.06142870691664489, "learning_rate": 0.00018288461585912178, "loss": 0.8516, "step": 58720 }, { "epoch": 1.0310925402482487, "grad_norm": 0.05119327211567094, "learning_rate": 0.00018287835229248103, "loss": 0.8616, "step": 58730 }, { "epoch": 1.0312681051282502, "grad_norm": 0.07024997157113695, "learning_rate": 0.00018287208768842585, "loss": 0.8553, "step": 58740 }, { "epoch": 1.0314436700082517, "grad_norm": 0.056671990618684664, "learning_rate": 0.00018286582204703566, "loss": 0.8502, "step": 58750 }, { "epoch": 1.031619234888253, "grad_norm": 0.06241984767141441, "learning_rate": 0.0001828595553683898, "loss": 0.8535, "step": 58760 }, { "epoch": 1.0317947997682544, "grad_norm": 0.048390680865320494, "learning_rate": 0.00018285328765256772, "loss": 0.8465, "step": 58770 }, { "epoch": 1.0319703646482559, "grad_norm": 0.04764545418986805, "learning_rate": 0.00018284701889964875, "loss": 0.8447, "step": 58780 }, { "epoch": 1.0321459295282571, "grad_norm": 0.06851520235169642, "learning_rate": 0.0001828407491097124, "loss": 0.8403, "step": 58790 }, { "epoch": 1.0323214944082586, "grad_norm": 0.0771587622264935, "learning_rate": 0.00018283447828283802, "loss": 0.8494, "step": 58800 }, { "epoch": 1.03249705928826, "grad_norm": 0.06858580505860112, "learning_rate": 0.00018282820641910518, "loss": 0.8433, "step": 58810 }, { "epoch": 1.0326726241682613, "grad_norm": 0.05424672555433141, "learning_rate": 0.00018282193351859327, "loss": 0.8473, "step": 58820 }, { "epoch": 1.0328481890482628, "grad_norm": 0.06443954198974967, "learning_rate": 0.00018281565958138177, "loss": 0.852, "step": 58830 }, { "epoch": 1.0330237539282643, "grad_norm": 0.05247772383166401, "learning_rate": 0.00018280938460755018, "loss": 0.8564, "step": 58840 }, { "epoch": 1.0331993188082655, "grad_norm": 0.07251464391914239, "learning_rate": 0.00018280310859717798, "loss": 0.8541, "step": 58850 }, { "epoch": 1.033374883688267, "grad_norm": 0.06354074757832422, "learning_rate": 0.00018279683155034474, "loss": 0.8452, "step": 58860 }, { "epoch": 1.0335504485682685, "grad_norm": 0.08892528048696376, "learning_rate": 0.00018279055346712996, "loss": 0.8537, "step": 58870 }, { "epoch": 1.0337260134482698, "grad_norm": 0.07738347543511388, "learning_rate": 0.00018278427434761325, "loss": 0.8546, "step": 58880 }, { "epoch": 1.0339015783282712, "grad_norm": 0.04352591445287215, "learning_rate": 0.00018277799419187408, "loss": 0.8568, "step": 58890 }, { "epoch": 1.0340771432082727, "grad_norm": 0.09674667311516869, "learning_rate": 0.000182771712999992, "loss": 0.8634, "step": 58900 }, { "epoch": 1.034252708088274, "grad_norm": 0.06694275351328151, "learning_rate": 0.00018276543077204668, "loss": 0.8557, "step": 58910 }, { "epoch": 1.0344282729682754, "grad_norm": 0.06078491227270534, "learning_rate": 0.0001827591475081177, "loss": 0.8532, "step": 58920 }, { "epoch": 1.034603837848277, "grad_norm": 0.05274242692029416, "learning_rate": 0.0001827528632082846, "loss": 0.8469, "step": 58930 }, { "epoch": 1.0347794027282782, "grad_norm": 0.06012145022910564, "learning_rate": 0.00018274657787262713, "loss": 0.8478, "step": 58940 }, { "epoch": 1.0349549676082797, "grad_norm": 0.0584502284113655, "learning_rate": 0.0001827402915012248, "loss": 0.8466, "step": 58950 }, { "epoch": 1.0351305324882811, "grad_norm": 0.05528668149244231, "learning_rate": 0.0001827340040941573, "loss": 0.856, "step": 58960 }, { "epoch": 1.0353060973682824, "grad_norm": 0.052555592115292475, "learning_rate": 0.00018272771565150435, "loss": 0.8534, "step": 58970 }, { "epoch": 1.0354816622482839, "grad_norm": 0.06579901303192404, "learning_rate": 0.00018272142617334556, "loss": 0.8522, "step": 58980 }, { "epoch": 1.0356572271282853, "grad_norm": 0.04569085828393595, "learning_rate": 0.00018271513565976062, "loss": 0.8581, "step": 58990 }, { "epoch": 1.0358327920082866, "grad_norm": 0.0635263080873014, "learning_rate": 0.00018270884411082926, "loss": 0.8504, "step": 59000 }, { "epoch": 1.036008356888288, "grad_norm": 0.06050521301764601, "learning_rate": 0.0001827025515266312, "loss": 0.8539, "step": 59010 }, { "epoch": 1.0361839217682896, "grad_norm": 0.06244376373378186, "learning_rate": 0.0001826962579072462, "loss": 0.8494, "step": 59020 }, { "epoch": 1.0363594866482908, "grad_norm": 0.06241958967076976, "learning_rate": 0.00018268996325275386, "loss": 0.8475, "step": 59030 }, { "epoch": 1.0365350515282923, "grad_norm": 0.05772991619075174, "learning_rate": 0.00018268366756323405, "loss": 0.844, "step": 59040 }, { "epoch": 1.0367106164082938, "grad_norm": 0.058457849006189, "learning_rate": 0.00018267737083876658, "loss": 0.8457, "step": 59050 }, { "epoch": 1.036886181288295, "grad_norm": 0.0979778695321663, "learning_rate": 0.0001826710730794311, "loss": 0.8418, "step": 59060 }, { "epoch": 1.0370617461682965, "grad_norm": 0.05744487451778929, "learning_rate": 0.0001826647742853075, "loss": 0.853, "step": 59070 }, { "epoch": 1.037237311048298, "grad_norm": 0.06819669534059113, "learning_rate": 0.00018265847445647556, "loss": 0.843, "step": 59080 }, { "epoch": 1.0374128759282992, "grad_norm": 0.07820539417656219, "learning_rate": 0.0001826521735930151, "loss": 0.8439, "step": 59090 }, { "epoch": 1.0375884408083007, "grad_norm": 0.06086369739861539, "learning_rate": 0.00018264587169500594, "loss": 0.8394, "step": 59100 }, { "epoch": 1.0377640056883022, "grad_norm": 0.064400296642715, "learning_rate": 0.00018263956876252794, "loss": 0.8506, "step": 59110 }, { "epoch": 1.0379395705683034, "grad_norm": 0.05313530874541342, "learning_rate": 0.00018263326479566095, "loss": 0.8572, "step": 59120 }, { "epoch": 1.038115135448305, "grad_norm": 0.05992661100311786, "learning_rate": 0.00018262695979448488, "loss": 0.8559, "step": 59130 }, { "epoch": 1.0382907003283064, "grad_norm": 0.08437269424706641, "learning_rate": 0.00018262065375907957, "loss": 0.8499, "step": 59140 }, { "epoch": 1.0384662652083076, "grad_norm": 0.05264483740787297, "learning_rate": 0.00018261434668952493, "loss": 0.8533, "step": 59150 }, { "epoch": 1.0386418300883091, "grad_norm": 0.05905083574324505, "learning_rate": 0.0001826080385859009, "loss": 0.8577, "step": 59160 }, { "epoch": 1.0388173949683106, "grad_norm": 0.054772455332478265, "learning_rate": 0.00018260172944828737, "loss": 0.8526, "step": 59170 }, { "epoch": 1.0389929598483119, "grad_norm": 0.06016118143089992, "learning_rate": 0.00018259541927676426, "loss": 0.8408, "step": 59180 }, { "epoch": 1.0391685247283133, "grad_norm": 0.0644477034965523, "learning_rate": 0.00018258910807141158, "loss": 0.8494, "step": 59190 }, { "epoch": 1.0393440896083148, "grad_norm": 0.06575079271226414, "learning_rate": 0.00018258279583230924, "loss": 0.8545, "step": 59200 }, { "epoch": 1.039519654488316, "grad_norm": 0.07524526199061304, "learning_rate": 0.00018257648255953725, "loss": 0.8505, "step": 59210 }, { "epoch": 1.0396952193683175, "grad_norm": 0.05251314010340045, "learning_rate": 0.00018257016825317562, "loss": 0.8511, "step": 59220 }, { "epoch": 1.039870784248319, "grad_norm": 0.06814930091776203, "learning_rate": 0.0001825638529133043, "loss": 0.8531, "step": 59230 }, { "epoch": 1.0400463491283203, "grad_norm": 0.06120418539745883, "learning_rate": 0.00018255753654000334, "loss": 0.8541, "step": 59240 }, { "epoch": 1.0402219140083218, "grad_norm": 0.06053758172941148, "learning_rate": 0.00018255121913335275, "loss": 0.8481, "step": 59250 }, { "epoch": 1.0403974788883232, "grad_norm": 0.10121029084845852, "learning_rate": 0.0001825449006934326, "loss": 0.8506, "step": 59260 }, { "epoch": 1.0405730437683245, "grad_norm": 0.05171912790477786, "learning_rate": 0.0001825385812203229, "loss": 0.8533, "step": 59270 }, { "epoch": 1.040748608648326, "grad_norm": 0.05097854868232172, "learning_rate": 0.0001825322607141038, "loss": 0.8496, "step": 59280 }, { "epoch": 1.0409241735283274, "grad_norm": 0.07669855659483728, "learning_rate": 0.0001825259391748553, "loss": 0.8543, "step": 59290 }, { "epoch": 1.0410997384083287, "grad_norm": 0.05323897996586778, "learning_rate": 0.00018251961660265752, "loss": 0.8519, "step": 59300 }, { "epoch": 1.0412753032883302, "grad_norm": 0.05443565017458132, "learning_rate": 0.00018251329299759058, "loss": 0.8507, "step": 59310 }, { "epoch": 1.0414508681683317, "grad_norm": 0.04839276308005289, "learning_rate": 0.00018250696835973458, "loss": 0.8435, "step": 59320 }, { "epoch": 1.041626433048333, "grad_norm": 0.05896201951421716, "learning_rate": 0.0001825006426891697, "loss": 0.851, "step": 59330 }, { "epoch": 1.0418019979283344, "grad_norm": 0.08124405091632159, "learning_rate": 0.00018249431598597605, "loss": 0.8568, "step": 59340 }, { "epoch": 1.0419775628083359, "grad_norm": 0.05242607236656098, "learning_rate": 0.0001824879882502338, "loss": 0.8483, "step": 59350 }, { "epoch": 1.0421531276883371, "grad_norm": 0.06871032736531749, "learning_rate": 0.00018248165948202313, "loss": 0.8507, "step": 59360 }, { "epoch": 1.0423286925683386, "grad_norm": 0.05385815706271565, "learning_rate": 0.00018247532968142417, "loss": 0.8528, "step": 59370 }, { "epoch": 1.04250425744834, "grad_norm": 0.05688091285725989, "learning_rate": 0.00018246899884851722, "loss": 0.8513, "step": 59380 }, { "epoch": 1.0426798223283413, "grad_norm": 0.049730424089249846, "learning_rate": 0.00018246266698338244, "loss": 0.8499, "step": 59390 }, { "epoch": 1.0428553872083428, "grad_norm": 0.055180171093295725, "learning_rate": 0.00018245633408610001, "loss": 0.851, "step": 59400 }, { "epoch": 1.0430309520883443, "grad_norm": 0.05913238731567488, "learning_rate": 0.00018245000015675025, "loss": 0.8534, "step": 59410 }, { "epoch": 1.0432065169683455, "grad_norm": 0.07159077583307875, "learning_rate": 0.00018244366519541336, "loss": 0.859, "step": 59420 }, { "epoch": 1.043382081848347, "grad_norm": 0.09137084928239794, "learning_rate": 0.00018243732920216965, "loss": 0.8372, "step": 59430 }, { "epoch": 1.0435576467283485, "grad_norm": 0.06852631703451934, "learning_rate": 0.00018243099217709935, "loss": 0.8494, "step": 59440 }, { "epoch": 1.04373321160835, "grad_norm": 0.05496300132348518, "learning_rate": 0.00018242465412028283, "loss": 0.8422, "step": 59450 }, { "epoch": 1.0439087764883512, "grad_norm": 0.05102458539377945, "learning_rate": 0.00018241831503180026, "loss": 0.8447, "step": 59460 }, { "epoch": 1.0440843413683527, "grad_norm": 0.07852256726401179, "learning_rate": 0.00018241197491173207, "loss": 0.8623, "step": 59470 }, { "epoch": 1.0442599062483542, "grad_norm": 0.04706346516004175, "learning_rate": 0.00018240563376015854, "loss": 0.8592, "step": 59480 }, { "epoch": 1.0444354711283554, "grad_norm": 0.061106763728070716, "learning_rate": 0.00018239929157716004, "loss": 0.8543, "step": 59490 }, { "epoch": 1.044611036008357, "grad_norm": 0.06823834680905128, "learning_rate": 0.00018239294836281693, "loss": 0.8451, "step": 59500 }, { "epoch": 1.0447866008883584, "grad_norm": 0.06370364436502943, "learning_rate": 0.00018238660411720953, "loss": 0.8547, "step": 59510 }, { "epoch": 1.0449621657683597, "grad_norm": 0.05359411386460144, "learning_rate": 0.00018238025884041828, "loss": 0.8539, "step": 59520 }, { "epoch": 1.0451377306483611, "grad_norm": 0.055879442258361625, "learning_rate": 0.00018237391253252355, "loss": 0.8493, "step": 59530 }, { "epoch": 1.0453132955283626, "grad_norm": 0.0789864512313926, "learning_rate": 0.00018236756519360577, "loss": 0.8499, "step": 59540 }, { "epoch": 1.0454888604083639, "grad_norm": 0.05817144788801788, "learning_rate": 0.00018236121682374532, "loss": 0.8494, "step": 59550 }, { "epoch": 1.0456644252883653, "grad_norm": 0.05033704422719863, "learning_rate": 0.00018235486742302267, "loss": 0.8497, "step": 59560 }, { "epoch": 1.0458399901683668, "grad_norm": 0.05537994639202765, "learning_rate": 0.00018234851699151824, "loss": 0.8533, "step": 59570 }, { "epoch": 1.046015555048368, "grad_norm": 0.057918234934014996, "learning_rate": 0.00018234216552931253, "loss": 0.8598, "step": 59580 }, { "epoch": 1.0461911199283696, "grad_norm": 0.07136444180872714, "learning_rate": 0.000182335813036486, "loss": 0.8551, "step": 59590 }, { "epoch": 1.046366684808371, "grad_norm": 0.05747745947659825, "learning_rate": 0.00018232945951311916, "loss": 0.8538, "step": 59600 }, { "epoch": 1.0465422496883723, "grad_norm": 0.049225298744631656, "learning_rate": 0.00018232310495929242, "loss": 0.8453, "step": 59610 }, { "epoch": 1.0467178145683738, "grad_norm": 0.05530997588518064, "learning_rate": 0.0001823167493750864, "loss": 0.8587, "step": 59620 }, { "epoch": 1.0468933794483752, "grad_norm": 0.07217187317462502, "learning_rate": 0.00018231039276058155, "loss": 0.8491, "step": 59630 }, { "epoch": 1.0470689443283765, "grad_norm": 0.0625253773556848, "learning_rate": 0.00018230403511585846, "loss": 0.8516, "step": 59640 }, { "epoch": 1.047244509208378, "grad_norm": 0.0822470694815677, "learning_rate": 0.00018229767644099764, "loss": 0.8528, "step": 59650 }, { "epoch": 1.0474200740883794, "grad_norm": 0.06510461067750206, "learning_rate": 0.0001822913167360797, "loss": 0.8466, "step": 59660 }, { "epoch": 1.0475956389683807, "grad_norm": 0.08985684172088924, "learning_rate": 0.00018228495600118518, "loss": 0.8589, "step": 59670 }, { "epoch": 1.0477712038483822, "grad_norm": 0.04783142537222484, "learning_rate": 0.0001822785942363947, "loss": 0.8591, "step": 59680 }, { "epoch": 1.0479467687283837, "grad_norm": 0.05270053707749643, "learning_rate": 0.00018227223144178886, "loss": 0.8612, "step": 59690 }, { "epoch": 1.048122333608385, "grad_norm": 0.05457911472318335, "learning_rate": 0.00018226586761744827, "loss": 0.8669, "step": 59700 }, { "epoch": 1.0482978984883864, "grad_norm": 0.05690696085843239, "learning_rate": 0.00018225950276345353, "loss": 0.8484, "step": 59710 }, { "epoch": 1.0484734633683879, "grad_norm": 0.0636856163696016, "learning_rate": 0.00018225313687988536, "loss": 0.844, "step": 59720 }, { "epoch": 1.0486490282483891, "grad_norm": 0.04963609633369366, "learning_rate": 0.00018224676996682435, "loss": 0.848, "step": 59730 }, { "epoch": 1.0488245931283906, "grad_norm": 0.07716499299858307, "learning_rate": 0.00018224040202435122, "loss": 0.846, "step": 59740 }, { "epoch": 1.049000158008392, "grad_norm": 0.057951502794932344, "learning_rate": 0.0001822340330525466, "loss": 0.8455, "step": 59750 }, { "epoch": 1.0491757228883933, "grad_norm": 0.05354112765900599, "learning_rate": 0.0001822276630514912, "loss": 0.855, "step": 59760 }, { "epoch": 1.0493512877683948, "grad_norm": 0.07293368382363283, "learning_rate": 0.0001822212920212658, "loss": 0.8482, "step": 59770 }, { "epoch": 1.0495268526483963, "grad_norm": 0.05882154391330544, "learning_rate": 0.000182214919961951, "loss": 0.856, "step": 59780 }, { "epoch": 1.0497024175283975, "grad_norm": 0.08228140145249668, "learning_rate": 0.00018220854687362763, "loss": 0.8603, "step": 59790 }, { "epoch": 1.049877982408399, "grad_norm": 0.05805524415204143, "learning_rate": 0.0001822021727563764, "loss": 0.8554, "step": 59800 }, { "epoch": 1.0500535472884005, "grad_norm": 0.06778988321763342, "learning_rate": 0.0001821957976102781, "loss": 0.8506, "step": 59810 }, { "epoch": 1.0502291121684018, "grad_norm": 0.05274368356212095, "learning_rate": 0.00018218942143541348, "loss": 0.8499, "step": 59820 }, { "epoch": 1.0504046770484032, "grad_norm": 0.08661603405370949, "learning_rate": 0.0001821830442318633, "loss": 0.8447, "step": 59830 }, { "epoch": 1.0505802419284047, "grad_norm": 0.06701909689130763, "learning_rate": 0.00018217666599970838, "loss": 0.8465, "step": 59840 }, { "epoch": 1.050755806808406, "grad_norm": 0.05921063450622248, "learning_rate": 0.0001821702867390296, "loss": 0.8531, "step": 59850 }, { "epoch": 1.0509313716884074, "grad_norm": 0.07232025496837932, "learning_rate": 0.00018216390644990767, "loss": 0.8553, "step": 59860 }, { "epoch": 1.051106936568409, "grad_norm": 0.07656223442616333, "learning_rate": 0.00018215752513242353, "loss": 0.8464, "step": 59870 }, { "epoch": 1.0512825014484102, "grad_norm": 0.05985959783039181, "learning_rate": 0.000182151142786658, "loss": 0.851, "step": 59880 }, { "epoch": 1.0514580663284117, "grad_norm": 0.08479674042037046, "learning_rate": 0.00018214475941269193, "loss": 0.8548, "step": 59890 }, { "epoch": 1.0516336312084131, "grad_norm": 0.0679422083515928, "learning_rate": 0.0001821383750106062, "loss": 0.8546, "step": 59900 }, { "epoch": 1.0518091960884144, "grad_norm": 0.07177584061917873, "learning_rate": 0.00018213198958048172, "loss": 0.8521, "step": 59910 }, { "epoch": 1.0519847609684159, "grad_norm": 0.05293992787714145, "learning_rate": 0.00018212560312239938, "loss": 0.8472, "step": 59920 }, { "epoch": 1.0521603258484173, "grad_norm": 0.06802688835810954, "learning_rate": 0.0001821192156364401, "loss": 0.8476, "step": 59930 }, { "epoch": 1.0523358907284186, "grad_norm": 0.07592673890468883, "learning_rate": 0.0001821128271226848, "loss": 0.8353, "step": 59940 }, { "epoch": 1.05251145560842, "grad_norm": 0.07781454336625276, "learning_rate": 0.00018210643758121442, "loss": 0.8432, "step": 59950 }, { "epoch": 1.0526870204884216, "grad_norm": 0.07209812090622028, "learning_rate": 0.00018210004701210995, "loss": 0.8534, "step": 59960 }, { "epoch": 1.0528625853684228, "grad_norm": 0.05027436975609625, "learning_rate": 0.00018209365541545233, "loss": 0.8492, "step": 59970 }, { "epoch": 1.0530381502484243, "grad_norm": 0.07153004868270849, "learning_rate": 0.00018208726279132258, "loss": 0.8548, "step": 59980 }, { "epoch": 1.0532137151284258, "grad_norm": 0.07598934099118468, "learning_rate": 0.00018208086913980168, "loss": 0.8508, "step": 59990 }, { "epoch": 1.053389280008427, "grad_norm": 0.050766138187549284, "learning_rate": 0.0001820744744609706, "loss": 0.8551, "step": 60000 }, { "epoch": 1.0535648448884285, "grad_norm": 0.05757190959896189, "learning_rate": 0.00018206807875491038, "loss": 0.8466, "step": 60010 }, { "epoch": 1.05374040976843, "grad_norm": 0.05908967816536562, "learning_rate": 0.0001820616820217021, "loss": 0.8529, "step": 60020 }, { "epoch": 1.0539159746484312, "grad_norm": 0.07039605655883681, "learning_rate": 0.00018205528426142674, "loss": 0.848, "step": 60030 }, { "epoch": 1.0540915395284327, "grad_norm": 0.0678504296544689, "learning_rate": 0.0001820488854741654, "loss": 0.8492, "step": 60040 }, { "epoch": 1.0542671044084342, "grad_norm": 0.055385797161915364, "learning_rate": 0.00018204248565999919, "loss": 0.8565, "step": 60050 }, { "epoch": 1.0544426692884354, "grad_norm": 0.06542702497832366, "learning_rate": 0.00018203608481900913, "loss": 0.845, "step": 60060 }, { "epoch": 1.054618234168437, "grad_norm": 0.05484369393478439, "learning_rate": 0.00018202968295127634, "loss": 0.8512, "step": 60070 }, { "epoch": 1.0547937990484384, "grad_norm": 0.07042963195526668, "learning_rate": 0.00018202328005688192, "loss": 0.8489, "step": 60080 }, { "epoch": 1.0549693639284397, "grad_norm": 0.07818137728547689, "learning_rate": 0.00018201687613590703, "loss": 0.8523, "step": 60090 }, { "epoch": 1.0551449288084411, "grad_norm": 0.04587432554177762, "learning_rate": 0.0001820104711884328, "loss": 0.8499, "step": 60100 }, { "epoch": 1.0553204936884426, "grad_norm": 0.08405606948938073, "learning_rate": 0.00018200406521454035, "loss": 0.8522, "step": 60110 }, { "epoch": 1.0554960585684439, "grad_norm": 0.06246531174915386, "learning_rate": 0.0001819976582143109, "loss": 0.8441, "step": 60120 }, { "epoch": 1.0556716234484453, "grad_norm": 0.08439884181635326, "learning_rate": 0.0001819912501878256, "loss": 0.8542, "step": 60130 }, { "epoch": 1.0558471883284468, "grad_norm": 0.055823392204144014, "learning_rate": 0.0001819848411351656, "loss": 0.8539, "step": 60140 }, { "epoch": 1.056022753208448, "grad_norm": 0.06475148965465664, "learning_rate": 0.00018197843105641217, "loss": 0.8605, "step": 60150 }, { "epoch": 1.0561983180884496, "grad_norm": 0.06015893172930975, "learning_rate": 0.0001819720199516465, "loss": 0.8425, "step": 60160 }, { "epoch": 1.056373882968451, "grad_norm": 0.06495310219567273, "learning_rate": 0.0001819656078209498, "loss": 0.848, "step": 60170 }, { "epoch": 1.0565494478484525, "grad_norm": 0.097599280458904, "learning_rate": 0.00018195919466440334, "loss": 0.8518, "step": 60180 }, { "epoch": 1.0567250127284538, "grad_norm": 0.049339583596514365, "learning_rate": 0.00018195278048208836, "loss": 0.8452, "step": 60190 }, { "epoch": 1.0569005776084552, "grad_norm": 0.07490693062339321, "learning_rate": 0.00018194636527408617, "loss": 0.8441, "step": 60200 }, { "epoch": 1.0570761424884567, "grad_norm": 0.05389972674789821, "learning_rate": 0.000181939949040478, "loss": 0.8593, "step": 60210 }, { "epoch": 1.057251707368458, "grad_norm": 0.06929403383954734, "learning_rate": 0.00018193353178134515, "loss": 0.855, "step": 60220 }, { "epoch": 1.0574272722484594, "grad_norm": 0.052455375451491634, "learning_rate": 0.00018192711349676897, "loss": 0.848, "step": 60230 }, { "epoch": 1.057602837128461, "grad_norm": 0.07986870692881229, "learning_rate": 0.00018192069418683073, "loss": 0.8558, "step": 60240 }, { "epoch": 1.0577784020084622, "grad_norm": 0.058719466134070884, "learning_rate": 0.0001819142738516118, "loss": 0.8537, "step": 60250 }, { "epoch": 1.0579539668884637, "grad_norm": 0.07400472975933123, "learning_rate": 0.0001819078524911935, "loss": 0.8457, "step": 60260 }, { "epoch": 1.0581295317684651, "grad_norm": 0.06306795058200172, "learning_rate": 0.00018190143010565722, "loss": 0.8485, "step": 60270 }, { "epoch": 1.0583050966484664, "grad_norm": 0.06687926133851442, "learning_rate": 0.00018189500669508427, "loss": 0.8581, "step": 60280 }, { "epoch": 1.0584806615284679, "grad_norm": 0.05938666668551222, "learning_rate": 0.00018188858225955614, "loss": 0.8552, "step": 60290 }, { "epoch": 1.0586562264084693, "grad_norm": 0.08407928570266666, "learning_rate": 0.00018188215679915414, "loss": 0.8562, "step": 60300 }, { "epoch": 1.0588317912884706, "grad_norm": 0.06378558947347295, "learning_rate": 0.00018187573031395971, "loss": 0.8515, "step": 60310 }, { "epoch": 1.059007356168472, "grad_norm": 0.05141027868010165, "learning_rate": 0.0001818693028040543, "loss": 0.8452, "step": 60320 }, { "epoch": 1.0591829210484736, "grad_norm": 0.06030524096928478, "learning_rate": 0.0001818628742695193, "loss": 0.8482, "step": 60330 }, { "epoch": 1.0593584859284748, "grad_norm": 0.06231427601159012, "learning_rate": 0.00018185644471043622, "loss": 0.8456, "step": 60340 }, { "epoch": 1.0595340508084763, "grad_norm": 0.05836957531744594, "learning_rate": 0.00018185001412688643, "loss": 0.8509, "step": 60350 }, { "epoch": 1.0597096156884778, "grad_norm": 0.055612231426766745, "learning_rate": 0.0001818435825189515, "loss": 0.8438, "step": 60360 }, { "epoch": 1.059885180568479, "grad_norm": 0.05880828060142469, "learning_rate": 0.0001818371498867129, "loss": 0.8482, "step": 60370 }, { "epoch": 1.0600607454484805, "grad_norm": 0.06617398352244167, "learning_rate": 0.0001818307162302521, "loss": 0.8544, "step": 60380 }, { "epoch": 1.060236310328482, "grad_norm": 0.07334782301858093, "learning_rate": 0.00018182428154965064, "loss": 0.8516, "step": 60390 }, { "epoch": 1.0604118752084832, "grad_norm": 0.06203087151513314, "learning_rate": 0.00018181784584499005, "loss": 0.8482, "step": 60400 }, { "epoch": 1.0605874400884847, "grad_norm": 0.07829769669299173, "learning_rate": 0.00018181140911635186, "loss": 0.8463, "step": 60410 }, { "epoch": 1.0607630049684862, "grad_norm": 0.060207453646319915, "learning_rate": 0.00018180497136381765, "loss": 0.8572, "step": 60420 }, { "epoch": 1.0609385698484874, "grad_norm": 0.045306627153866415, "learning_rate": 0.00018179853258746891, "loss": 0.8591, "step": 60430 }, { "epoch": 1.061114134728489, "grad_norm": 0.05306353879545023, "learning_rate": 0.00018179209278738732, "loss": 0.8462, "step": 60440 }, { "epoch": 1.0612896996084904, "grad_norm": 0.12197797229015668, "learning_rate": 0.00018178565196365443, "loss": 0.8494, "step": 60450 }, { "epoch": 1.0614652644884917, "grad_norm": 0.0914520840327145, "learning_rate": 0.00018177921011635183, "loss": 0.8502, "step": 60460 }, { "epoch": 1.0616408293684931, "grad_norm": 0.1084187280511267, "learning_rate": 0.00018177276724556116, "loss": 0.8493, "step": 60470 }, { "epoch": 1.0618163942484946, "grad_norm": 0.06660244043940805, "learning_rate": 0.00018176632335136406, "loss": 0.8535, "step": 60480 }, { "epoch": 1.0619919591284959, "grad_norm": 0.0662624074786878, "learning_rate": 0.00018175987843384217, "loss": 0.8416, "step": 60490 }, { "epoch": 1.0621675240084973, "grad_norm": 0.04919604887618097, "learning_rate": 0.00018175343249307712, "loss": 0.84, "step": 60500 }, { "epoch": 1.0623430888884988, "grad_norm": 0.06353012179673118, "learning_rate": 0.0001817469855291506, "loss": 0.8433, "step": 60510 }, { "epoch": 1.0625186537685, "grad_norm": 0.05388053053894124, "learning_rate": 0.00018174053754214432, "loss": 0.8544, "step": 60520 }, { "epoch": 1.0626942186485016, "grad_norm": 0.062444667774841034, "learning_rate": 0.00018173408853213994, "loss": 0.8441, "step": 60530 }, { "epoch": 1.062869783528503, "grad_norm": 0.0693002272237464, "learning_rate": 0.0001817276384992192, "loss": 0.8509, "step": 60540 }, { "epoch": 1.0630453484085043, "grad_norm": 0.052978278271207116, "learning_rate": 0.00018172118744346383, "loss": 0.8547, "step": 60550 }, { "epoch": 1.0632209132885058, "grad_norm": 0.06742162159863273, "learning_rate": 0.0001817147353649555, "loss": 0.8525, "step": 60560 }, { "epoch": 1.0633964781685072, "grad_norm": 0.05796068900843066, "learning_rate": 0.000181708282263776, "loss": 0.8478, "step": 60570 }, { "epoch": 1.0635720430485085, "grad_norm": 0.05027505077939828, "learning_rate": 0.00018170182814000713, "loss": 0.8542, "step": 60580 }, { "epoch": 1.06374760792851, "grad_norm": 0.05247471547047542, "learning_rate": 0.00018169537299373063, "loss": 0.8594, "step": 60590 }, { "epoch": 1.0639231728085115, "grad_norm": 0.053126297978310005, "learning_rate": 0.00018168891682502828, "loss": 0.8502, "step": 60600 }, { "epoch": 1.0640987376885127, "grad_norm": 0.07271335456704277, "learning_rate": 0.0001816824596339819, "loss": 0.8448, "step": 60610 }, { "epoch": 1.0642743025685142, "grad_norm": 0.07470848397232722, "learning_rate": 0.00018167600142067326, "loss": 0.8598, "step": 60620 }, { "epoch": 1.0644498674485157, "grad_norm": 0.07024577687439819, "learning_rate": 0.00018166954218518423, "loss": 0.8556, "step": 60630 }, { "epoch": 1.064625432328517, "grad_norm": 0.05031817691434465, "learning_rate": 0.00018166308192759666, "loss": 0.8551, "step": 60640 }, { "epoch": 1.0648009972085184, "grad_norm": 0.06874554214369889, "learning_rate": 0.00018165662064799237, "loss": 0.8569, "step": 60650 }, { "epoch": 1.0649765620885199, "grad_norm": 0.07168306585984462, "learning_rate": 0.00018165015834645327, "loss": 0.8527, "step": 60660 }, { "epoch": 1.0651521269685211, "grad_norm": 0.0508421155000683, "learning_rate": 0.00018164369502306115, "loss": 0.8468, "step": 60670 }, { "epoch": 1.0653276918485226, "grad_norm": 0.06819114306500294, "learning_rate": 0.000181637230677898, "loss": 0.8534, "step": 60680 }, { "epoch": 1.065503256728524, "grad_norm": 0.07608675414719157, "learning_rate": 0.0001816307653110457, "loss": 0.8506, "step": 60690 }, { "epoch": 1.0656788216085253, "grad_norm": 0.057272379684035715, "learning_rate": 0.0001816242989225861, "loss": 0.8477, "step": 60700 }, { "epoch": 1.0658543864885268, "grad_norm": 0.05149030788212542, "learning_rate": 0.00018161783151260123, "loss": 0.8423, "step": 60710 }, { "epoch": 1.0660299513685283, "grad_norm": 0.09792147442266744, "learning_rate": 0.00018161136308117294, "loss": 0.8464, "step": 60720 }, { "epoch": 1.0662055162485296, "grad_norm": 0.07470554623841476, "learning_rate": 0.00018160489362838324, "loss": 0.8494, "step": 60730 }, { "epoch": 1.066381081128531, "grad_norm": 0.0908576937628512, "learning_rate": 0.0001815984231543141, "loss": 0.8558, "step": 60740 }, { "epoch": 1.0665566460085325, "grad_norm": 0.08325156259659625, "learning_rate": 0.0001815919516590475, "loss": 0.8493, "step": 60750 }, { "epoch": 1.0667322108885338, "grad_norm": 0.05563149390691716, "learning_rate": 0.00018158547914266543, "loss": 0.8546, "step": 60760 }, { "epoch": 1.0669077757685352, "grad_norm": 0.06460796323081544, "learning_rate": 0.00018157900560524987, "loss": 0.8534, "step": 60770 }, { "epoch": 1.0670833406485367, "grad_norm": 0.06949839961214194, "learning_rate": 0.0001815725310468829, "loss": 0.8516, "step": 60780 }, { "epoch": 1.067258905528538, "grad_norm": 0.06850697540961595, "learning_rate": 0.00018156605546764648, "loss": 0.8412, "step": 60790 }, { "epoch": 1.0674344704085394, "grad_norm": 0.05781111048324669, "learning_rate": 0.00018155957886762273, "loss": 0.8491, "step": 60800 }, { "epoch": 1.067610035288541, "grad_norm": 0.04996215523687568, "learning_rate": 0.00018155310124689367, "loss": 0.8466, "step": 60810 }, { "epoch": 1.0677856001685422, "grad_norm": 0.05180375392416595, "learning_rate": 0.00018154662260554135, "loss": 0.8561, "step": 60820 }, { "epoch": 1.0679611650485437, "grad_norm": 0.095219420188232, "learning_rate": 0.0001815401429436479, "loss": 0.8498, "step": 60830 }, { "epoch": 1.0681367299285451, "grad_norm": 0.056150316628440125, "learning_rate": 0.00018153366226129544, "loss": 0.8512, "step": 60840 }, { "epoch": 1.0683122948085466, "grad_norm": 0.05139619700470689, "learning_rate": 0.00018152718055856602, "loss": 0.8548, "step": 60850 }, { "epoch": 1.0684878596885479, "grad_norm": 0.07333992728204673, "learning_rate": 0.00018152069783554178, "loss": 0.8526, "step": 60860 }, { "epoch": 1.0686634245685493, "grad_norm": 0.059614723417672585, "learning_rate": 0.00018151421409230486, "loss": 0.8619, "step": 60870 }, { "epoch": 1.0688389894485506, "grad_norm": 0.07012723867034898, "learning_rate": 0.00018150772932893744, "loss": 0.85, "step": 60880 }, { "epoch": 1.069014554328552, "grad_norm": 0.05975806011091313, "learning_rate": 0.00018150124354552162, "loss": 0.8515, "step": 60890 }, { "epoch": 1.0691901192085536, "grad_norm": 0.08850164659756968, "learning_rate": 0.00018149475674213966, "loss": 0.8488, "step": 60900 }, { "epoch": 1.069365684088555, "grad_norm": 0.08141962949462642, "learning_rate": 0.00018148826891887368, "loss": 0.8442, "step": 60910 }, { "epoch": 1.0695412489685563, "grad_norm": 0.09051061300875776, "learning_rate": 0.00018148178007580595, "loss": 0.8517, "step": 60920 }, { "epoch": 1.0697168138485578, "grad_norm": 0.06180742592581656, "learning_rate": 0.0001814752902130186, "loss": 0.8494, "step": 60930 }, { "epoch": 1.069892378728559, "grad_norm": 0.06460404353525807, "learning_rate": 0.00018146879933059388, "loss": 0.8498, "step": 60940 }, { "epoch": 1.0700679436085605, "grad_norm": 0.06476912171484074, "learning_rate": 0.00018146230742861413, "loss": 0.8542, "step": 60950 }, { "epoch": 1.070243508488562, "grad_norm": 0.05589654670773275, "learning_rate": 0.00018145581450716147, "loss": 0.861, "step": 60960 }, { "epoch": 1.0704190733685635, "grad_norm": 0.04819824577539295, "learning_rate": 0.0001814493205663182, "loss": 0.8538, "step": 60970 }, { "epoch": 1.0705946382485647, "grad_norm": 0.056856645006226385, "learning_rate": 0.00018144282560616666, "loss": 0.8546, "step": 60980 }, { "epoch": 1.0707702031285662, "grad_norm": 0.06434151242780092, "learning_rate": 0.0001814363296267891, "loss": 0.8511, "step": 60990 }, { "epoch": 1.0709457680085677, "grad_norm": 0.04564298270777429, "learning_rate": 0.0001814298326282678, "loss": 0.8543, "step": 61000 }, { "epoch": 1.071121332888569, "grad_norm": 0.08171134886969873, "learning_rate": 0.00018142333461068514, "loss": 0.8445, "step": 61010 }, { "epoch": 1.0712968977685704, "grad_norm": 0.07590989300190656, "learning_rate": 0.0001814168355741234, "loss": 0.8472, "step": 61020 }, { "epoch": 1.0714724626485719, "grad_norm": 0.08978821174043707, "learning_rate": 0.00018141033551866492, "loss": 0.8495, "step": 61030 }, { "epoch": 1.0716480275285731, "grad_norm": 0.05370060825402343, "learning_rate": 0.00018140383444439212, "loss": 0.8457, "step": 61040 }, { "epoch": 1.0718235924085746, "grad_norm": 0.055875171107074524, "learning_rate": 0.0001813973323513873, "loss": 0.858, "step": 61050 }, { "epoch": 1.071999157288576, "grad_norm": 0.06540885319956086, "learning_rate": 0.00018139082923973287, "loss": 0.8505, "step": 61060 }, { "epoch": 1.0721747221685773, "grad_norm": 0.07289620272540193, "learning_rate": 0.00018138432510951126, "loss": 0.8547, "step": 61070 }, { "epoch": 1.0723502870485788, "grad_norm": 0.13317645156450827, "learning_rate": 0.0001813778199608048, "loss": 0.842, "step": 61080 }, { "epoch": 1.0725258519285803, "grad_norm": 0.06865671313322506, "learning_rate": 0.000181371313793696, "loss": 0.8493, "step": 61090 }, { "epoch": 1.0727014168085816, "grad_norm": 0.06159295352173081, "learning_rate": 0.00018136480660826723, "loss": 0.8513, "step": 61100 }, { "epoch": 1.072876981688583, "grad_norm": 0.07209979180890977, "learning_rate": 0.00018135829840460095, "loss": 0.851, "step": 61110 }, { "epoch": 1.0730525465685845, "grad_norm": 0.06926485955596326, "learning_rate": 0.00018135178918277962, "loss": 0.8431, "step": 61120 }, { "epoch": 1.0732281114485858, "grad_norm": 0.074794521755562, "learning_rate": 0.00018134527894288576, "loss": 0.8469, "step": 61130 }, { "epoch": 1.0734036763285872, "grad_norm": 0.07153151045719304, "learning_rate": 0.0001813387676850018, "loss": 0.8484, "step": 61140 }, { "epoch": 1.0735792412085887, "grad_norm": 0.05743113435941426, "learning_rate": 0.00018133225540921025, "loss": 0.8489, "step": 61150 }, { "epoch": 1.07375480608859, "grad_norm": 0.047601002135514825, "learning_rate": 0.00018132574211559362, "loss": 0.8568, "step": 61160 }, { "epoch": 1.0739303709685915, "grad_norm": 0.05254497100347585, "learning_rate": 0.00018131922780423448, "loss": 0.8528, "step": 61170 }, { "epoch": 1.074105935848593, "grad_norm": 0.05298067376086963, "learning_rate": 0.0001813127124752153, "loss": 0.8438, "step": 61180 }, { "epoch": 1.0742815007285942, "grad_norm": 0.08144990072131607, "learning_rate": 0.00018130619612861866, "loss": 0.8447, "step": 61190 }, { "epoch": 1.0744570656085957, "grad_norm": 0.06164539233023013, "learning_rate": 0.00018129967876452711, "loss": 0.8487, "step": 61200 }, { "epoch": 1.0746326304885971, "grad_norm": 0.06384109247229344, "learning_rate": 0.00018129316038302328, "loss": 0.8485, "step": 61210 }, { "epoch": 1.0748081953685984, "grad_norm": 0.08737237174098515, "learning_rate": 0.00018128664098418967, "loss": 0.8518, "step": 61220 }, { "epoch": 1.0749837602485999, "grad_norm": 0.08074756456707065, "learning_rate": 0.000181280120568109, "loss": 0.8553, "step": 61230 }, { "epoch": 1.0751593251286014, "grad_norm": 0.05086431173438419, "learning_rate": 0.00018127359913486376, "loss": 0.856, "step": 61240 }, { "epoch": 1.0753348900086026, "grad_norm": 0.051295258405064326, "learning_rate": 0.00018126707668453666, "loss": 0.8477, "step": 61250 }, { "epoch": 1.075510454888604, "grad_norm": 0.07422356674181094, "learning_rate": 0.00018126055321721033, "loss": 0.8506, "step": 61260 }, { "epoch": 1.0756860197686056, "grad_norm": 0.05560415885161344, "learning_rate": 0.00018125402873296736, "loss": 0.847, "step": 61270 }, { "epoch": 1.0758615846486068, "grad_norm": 0.17953808678961924, "learning_rate": 0.00018124750323189053, "loss": 0.851, "step": 61280 }, { "epoch": 1.0760371495286083, "grad_norm": 0.07414710310545625, "learning_rate": 0.0001812409767140624, "loss": 0.8538, "step": 61290 }, { "epoch": 1.0762127144086098, "grad_norm": 0.04771236858301296, "learning_rate": 0.00018123444917956573, "loss": 0.8457, "step": 61300 }, { "epoch": 1.076388279288611, "grad_norm": 0.0660846303929062, "learning_rate": 0.00018122792062848323, "loss": 0.8463, "step": 61310 }, { "epoch": 1.0765638441686125, "grad_norm": 0.0657687237376536, "learning_rate": 0.00018122139106089758, "loss": 0.8491, "step": 61320 }, { "epoch": 1.076739409048614, "grad_norm": 0.07323678821287113, "learning_rate": 0.00018121486047689153, "loss": 0.8571, "step": 61330 }, { "epoch": 1.0769149739286152, "grad_norm": 0.05885156568943065, "learning_rate": 0.00018120832887654785, "loss": 0.8522, "step": 61340 }, { "epoch": 1.0770905388086167, "grad_norm": 0.05900419537821282, "learning_rate": 0.00018120179625994925, "loss": 0.8562, "step": 61350 }, { "epoch": 1.0772661036886182, "grad_norm": 0.07137643068363302, "learning_rate": 0.00018119526262717848, "loss": 0.8434, "step": 61360 }, { "epoch": 1.0774416685686194, "grad_norm": 0.07261975578836526, "learning_rate": 0.00018118872797831846, "loss": 0.8407, "step": 61370 }, { "epoch": 1.077617233448621, "grad_norm": 0.08410748329040518, "learning_rate": 0.00018118219231345184, "loss": 0.8531, "step": 61380 }, { "epoch": 1.0777927983286224, "grad_norm": 0.06814595942692064, "learning_rate": 0.00018117565563266147, "loss": 0.8511, "step": 61390 }, { "epoch": 1.0779683632086237, "grad_norm": 0.08661230790651667, "learning_rate": 0.00018116911793603018, "loss": 0.8489, "step": 61400 }, { "epoch": 1.0781439280886251, "grad_norm": 0.06295162123219887, "learning_rate": 0.00018116257922364077, "loss": 0.8475, "step": 61410 }, { "epoch": 1.0783194929686266, "grad_norm": 0.05999521759867772, "learning_rate": 0.0001811560394955762, "loss": 0.8464, "step": 61420 }, { "epoch": 1.0784950578486279, "grad_norm": 0.06390095949545617, "learning_rate": 0.00018114949875191918, "loss": 0.8546, "step": 61430 }, { "epoch": 1.0786706227286293, "grad_norm": 0.04882657467286585, "learning_rate": 0.00018114295699275264, "loss": 0.8494, "step": 61440 }, { "epoch": 1.0788461876086308, "grad_norm": 0.06789260078382853, "learning_rate": 0.0001811364142181595, "loss": 0.8501, "step": 61450 }, { "epoch": 1.079021752488632, "grad_norm": 0.056123297734853125, "learning_rate": 0.00018112987042822267, "loss": 0.8534, "step": 61460 }, { "epoch": 1.0791973173686336, "grad_norm": 0.06993204973128615, "learning_rate": 0.00018112332562302498, "loss": 0.8461, "step": 61470 }, { "epoch": 1.079372882248635, "grad_norm": 0.05418157517624619, "learning_rate": 0.00018111677980264942, "loss": 0.8522, "step": 61480 }, { "epoch": 1.0795484471286363, "grad_norm": 0.0625156686032595, "learning_rate": 0.0001811102329671789, "loss": 0.8546, "step": 61490 }, { "epoch": 1.0797240120086378, "grad_norm": 0.048455081786156186, "learning_rate": 0.00018110368511669637, "loss": 0.8528, "step": 61500 }, { "epoch": 1.0798995768886392, "grad_norm": 0.06729999027158527, "learning_rate": 0.0001810971362512848, "loss": 0.8474, "step": 61510 }, { "epoch": 1.0800751417686405, "grad_norm": 0.07429453016513118, "learning_rate": 0.00018109058637102716, "loss": 0.8444, "step": 61520 }, { "epoch": 1.080250706648642, "grad_norm": 0.04225325298497373, "learning_rate": 0.00018108403547600644, "loss": 0.8499, "step": 61530 }, { "epoch": 1.0804262715286435, "grad_norm": 0.07383898658660464, "learning_rate": 0.00018107748356630565, "loss": 0.8527, "step": 61540 }, { "epoch": 1.0806018364086447, "grad_norm": 0.059489934447039386, "learning_rate": 0.0001810709306420078, "loss": 0.855, "step": 61550 }, { "epoch": 1.0807774012886462, "grad_norm": 0.08308266130334194, "learning_rate": 0.0001810643767031959, "loss": 0.8507, "step": 61560 }, { "epoch": 1.0809529661686477, "grad_norm": 0.05140597948719829, "learning_rate": 0.00018105782174995303, "loss": 0.8458, "step": 61570 }, { "epoch": 1.0811285310486491, "grad_norm": 0.046821462169328885, "learning_rate": 0.0001810512657823622, "loss": 0.8484, "step": 61580 }, { "epoch": 1.0813040959286504, "grad_norm": 0.059194332170896154, "learning_rate": 0.00018104470880050646, "loss": 0.8594, "step": 61590 }, { "epoch": 1.0814796608086519, "grad_norm": 0.06596832387493809, "learning_rate": 0.00018103815080446897, "loss": 0.8501, "step": 61600 }, { "epoch": 1.0816552256886531, "grad_norm": 0.04712628563900404, "learning_rate": 0.00018103159179433273, "loss": 0.8501, "step": 61610 }, { "epoch": 1.0818307905686546, "grad_norm": 0.06692339181445993, "learning_rate": 0.0001810250317701809, "loss": 0.8538, "step": 61620 }, { "epoch": 1.082006355448656, "grad_norm": 0.05658241255977982, "learning_rate": 0.00018101847073209658, "loss": 0.8462, "step": 61630 }, { "epoch": 1.0821819203286576, "grad_norm": 0.06426585325539254, "learning_rate": 0.0001810119086801629, "loss": 0.8453, "step": 61640 }, { "epoch": 1.0823574852086588, "grad_norm": 0.05087924401149423, "learning_rate": 0.000181005345614463, "loss": 0.8498, "step": 61650 }, { "epoch": 1.0825330500886603, "grad_norm": 0.056728471334558384, "learning_rate": 0.00018099878153508004, "loss": 0.8482, "step": 61660 }, { "epoch": 1.0827086149686616, "grad_norm": 0.07496254226973188, "learning_rate": 0.00018099221644209717, "loss": 0.8518, "step": 61670 }, { "epoch": 1.082884179848663, "grad_norm": 0.052243609542536436, "learning_rate": 0.00018098565033559762, "loss": 0.8541, "step": 61680 }, { "epoch": 1.0830597447286645, "grad_norm": 0.06236640631687285, "learning_rate": 0.00018097908321566456, "loss": 0.853, "step": 61690 }, { "epoch": 1.083235309608666, "grad_norm": 0.07120128971789103, "learning_rate": 0.00018097251508238113, "loss": 0.8478, "step": 61700 }, { "epoch": 1.0834108744886672, "grad_norm": 0.06106208693982511, "learning_rate": 0.00018096594593583064, "loss": 0.8511, "step": 61710 }, { "epoch": 1.0835864393686687, "grad_norm": 0.07162533577556529, "learning_rate": 0.00018095937577609632, "loss": 0.8524, "step": 61720 }, { "epoch": 1.0837620042486702, "grad_norm": 0.053087796874725646, "learning_rate": 0.00018095280460326133, "loss": 0.8495, "step": 61730 }, { "epoch": 1.0839375691286715, "grad_norm": 0.06293402332507622, "learning_rate": 0.00018094623241740902, "loss": 0.8514, "step": 61740 }, { "epoch": 1.084113134008673, "grad_norm": 0.05531033034611246, "learning_rate": 0.0001809396592186226, "loss": 0.8482, "step": 61750 }, { "epoch": 1.0842886988886744, "grad_norm": 0.0613203833061528, "learning_rate": 0.0001809330850069854, "loss": 0.8466, "step": 61760 }, { "epoch": 1.0844642637686757, "grad_norm": 0.05916560995980617, "learning_rate": 0.0001809265097825807, "loss": 0.8539, "step": 61770 }, { "epoch": 1.0846398286486771, "grad_norm": 0.05469748895178347, "learning_rate": 0.00018091993354549176, "loss": 0.8503, "step": 61780 }, { "epoch": 1.0848153935286786, "grad_norm": 0.056909404242345925, "learning_rate": 0.000180913356295802, "loss": 0.8491, "step": 61790 }, { "epoch": 1.0849909584086799, "grad_norm": 0.08317196596843635, "learning_rate": 0.00018090677803359466, "loss": 0.8557, "step": 61800 }, { "epoch": 1.0851665232886814, "grad_norm": 0.05206790648214862, "learning_rate": 0.00018090019875895312, "loss": 0.8533, "step": 61810 }, { "epoch": 1.0853420881686828, "grad_norm": 0.05528122015136484, "learning_rate": 0.00018089361847196077, "loss": 0.8533, "step": 61820 }, { "epoch": 1.085517653048684, "grad_norm": 0.04752043415446419, "learning_rate": 0.00018088703717270097, "loss": 0.8578, "step": 61830 }, { "epoch": 1.0856932179286856, "grad_norm": 0.04724508712690143, "learning_rate": 0.00018088045486125707, "loss": 0.851, "step": 61840 }, { "epoch": 1.085868782808687, "grad_norm": 0.07630966103327565, "learning_rate": 0.00018087387153771254, "loss": 0.8585, "step": 61850 }, { "epoch": 1.0860443476886883, "grad_norm": 0.06499777699630122, "learning_rate": 0.00018086728720215074, "loss": 0.8524, "step": 61860 }, { "epoch": 1.0862199125686898, "grad_norm": 0.06458319472080983, "learning_rate": 0.00018086070185465507, "loss": 0.8491, "step": 61870 }, { "epoch": 1.0863954774486912, "grad_norm": 0.06638378603018875, "learning_rate": 0.00018085411549530902, "loss": 0.8514, "step": 61880 }, { "epoch": 1.0865710423286925, "grad_norm": 0.07080928967134459, "learning_rate": 0.00018084752812419603, "loss": 0.8459, "step": 61890 }, { "epoch": 1.086746607208694, "grad_norm": 0.07287026211078547, "learning_rate": 0.00018084093974139956, "loss": 0.8483, "step": 61900 }, { "epoch": 1.0869221720886955, "grad_norm": 0.08503901361811703, "learning_rate": 0.00018083435034700306, "loss": 0.8588, "step": 61910 }, { "epoch": 1.0870977369686967, "grad_norm": 0.07827749218119812, "learning_rate": 0.00018082775994109006, "loss": 0.8526, "step": 61920 }, { "epoch": 1.0872733018486982, "grad_norm": 0.05418022735514971, "learning_rate": 0.00018082116852374407, "loss": 0.8532, "step": 61930 }, { "epoch": 1.0874488667286997, "grad_norm": 0.0683115943138406, "learning_rate": 0.00018081457609504852, "loss": 0.8536, "step": 61940 }, { "epoch": 1.087624431608701, "grad_norm": 0.05843054249545686, "learning_rate": 0.00018080798265508705, "loss": 0.844, "step": 61950 }, { "epoch": 1.0877999964887024, "grad_norm": 0.059790847787108005, "learning_rate": 0.0001808013882039431, "loss": 0.8512, "step": 61960 }, { "epoch": 1.0879755613687039, "grad_norm": 0.05023299749625879, "learning_rate": 0.0001807947927417003, "loss": 0.8478, "step": 61970 }, { "epoch": 1.0881511262487051, "grad_norm": 0.08044081902243516, "learning_rate": 0.00018078819626844215, "loss": 0.8529, "step": 61980 }, { "epoch": 1.0883266911287066, "grad_norm": 0.06422885927349552, "learning_rate": 0.00018078159878425227, "loss": 0.8467, "step": 61990 }, { "epoch": 1.088502256008708, "grad_norm": 0.06155370304233618, "learning_rate": 0.00018077500028921428, "loss": 0.8414, "step": 62000 }, { "epoch": 1.0886778208887093, "grad_norm": 0.05410893706721108, "learning_rate": 0.00018076840078341168, "loss": 0.8482, "step": 62010 }, { "epoch": 1.0888533857687108, "grad_norm": 0.06082758702504676, "learning_rate": 0.0001807618002669282, "loss": 0.8522, "step": 62020 }, { "epoch": 1.0890289506487123, "grad_norm": 0.062640904931616, "learning_rate": 0.00018075519873984744, "loss": 0.8491, "step": 62030 }, { "epoch": 1.0892045155287136, "grad_norm": 0.06820114942737333, "learning_rate": 0.00018074859620225298, "loss": 0.8493, "step": 62040 }, { "epoch": 1.089380080408715, "grad_norm": 0.05791195640913787, "learning_rate": 0.0001807419926542286, "loss": 0.8514, "step": 62050 }, { "epoch": 1.0895556452887165, "grad_norm": 0.0615979206538511, "learning_rate": 0.00018073538809585776, "loss": 0.8507, "step": 62060 }, { "epoch": 1.0897312101687178, "grad_norm": 0.06396255850589375, "learning_rate": 0.0001807287825272244, "loss": 0.8516, "step": 62070 }, { "epoch": 1.0899067750487192, "grad_norm": 0.05060906550496095, "learning_rate": 0.00018072217594841203, "loss": 0.8527, "step": 62080 }, { "epoch": 1.0900823399287207, "grad_norm": 0.08157992451739296, "learning_rate": 0.00018071556835950442, "loss": 0.8493, "step": 62090 }, { "epoch": 1.090257904808722, "grad_norm": 0.056258443908712895, "learning_rate": 0.00018070895976058526, "loss": 0.8559, "step": 62100 }, { "epoch": 1.0904334696887235, "grad_norm": 0.04754357476679131, "learning_rate": 0.00018070235015173834, "loss": 0.8507, "step": 62110 }, { "epoch": 1.090609034568725, "grad_norm": 0.04978886466317822, "learning_rate": 0.00018069573953304735, "loss": 0.8497, "step": 62120 }, { "epoch": 1.0907845994487262, "grad_norm": 0.04939258716084147, "learning_rate": 0.00018068912790459606, "loss": 0.8545, "step": 62130 }, { "epoch": 1.0909601643287277, "grad_norm": 0.07184718545498965, "learning_rate": 0.00018068251526646826, "loss": 0.8546, "step": 62140 }, { "epoch": 1.0911357292087291, "grad_norm": 0.06334322503174526, "learning_rate": 0.0001806759016187477, "loss": 0.8466, "step": 62150 }, { "epoch": 1.0913112940887304, "grad_norm": 0.051934476821033526, "learning_rate": 0.00018066928696151822, "loss": 0.8503, "step": 62160 }, { "epoch": 1.0914868589687319, "grad_norm": 0.06583014769441684, "learning_rate": 0.0001806626712948636, "loss": 0.8435, "step": 62170 }, { "epoch": 1.0916624238487334, "grad_norm": 0.06287177525421733, "learning_rate": 0.00018065605461886766, "loss": 0.8501, "step": 62180 }, { "epoch": 1.0918379887287346, "grad_norm": 0.05545856932862779, "learning_rate": 0.00018064943693361425, "loss": 0.8512, "step": 62190 }, { "epoch": 1.092013553608736, "grad_norm": 0.06928479371450576, "learning_rate": 0.00018064281823918722, "loss": 0.8487, "step": 62200 }, { "epoch": 1.0921891184887376, "grad_norm": 0.06642067246293469, "learning_rate": 0.0001806361985356704, "loss": 0.8428, "step": 62210 }, { "epoch": 1.0923646833687388, "grad_norm": 0.06768775913397528, "learning_rate": 0.00018062957782314767, "loss": 0.8474, "step": 62220 }, { "epoch": 1.0925402482487403, "grad_norm": 0.059902596412086284, "learning_rate": 0.00018062295610170296, "loss": 0.8543, "step": 62230 }, { "epoch": 1.0927158131287418, "grad_norm": 0.07683787231397832, "learning_rate": 0.00018061633337142013, "loss": 0.8476, "step": 62240 }, { "epoch": 1.092891378008743, "grad_norm": 0.06383171922958475, "learning_rate": 0.00018060970963238305, "loss": 0.8485, "step": 62250 }, { "epoch": 1.0930669428887445, "grad_norm": 0.07517493995232265, "learning_rate": 0.00018060308488467578, "loss": 0.8524, "step": 62260 }, { "epoch": 1.093242507768746, "grad_norm": 0.04778258484979142, "learning_rate": 0.0001805964591283821, "loss": 0.8475, "step": 62270 }, { "epoch": 1.0934180726487472, "grad_norm": 0.05109455246665613, "learning_rate": 0.00018058983236358603, "loss": 0.8445, "step": 62280 }, { "epoch": 1.0935936375287487, "grad_norm": 0.06371268664659115, "learning_rate": 0.00018058320459037156, "loss": 0.8519, "step": 62290 }, { "epoch": 1.0937692024087502, "grad_norm": 0.0810717546056745, "learning_rate": 0.00018057657580882263, "loss": 0.8497, "step": 62300 }, { "epoch": 1.0939447672887517, "grad_norm": 0.0632514110472606, "learning_rate": 0.0001805699460190232, "loss": 0.8524, "step": 62310 }, { "epoch": 1.094120332168753, "grad_norm": 0.08294901567916205, "learning_rate": 0.00018056331522105736, "loss": 0.8478, "step": 62320 }, { "epoch": 1.0942958970487544, "grad_norm": 0.06059212974565299, "learning_rate": 0.00018055668341500903, "loss": 0.8444, "step": 62330 }, { "epoch": 1.0944714619287557, "grad_norm": 0.05243597581702847, "learning_rate": 0.0001805500506009623, "loss": 0.8486, "step": 62340 }, { "epoch": 1.0946470268087571, "grad_norm": 0.05784115910656347, "learning_rate": 0.00018054341677900117, "loss": 0.8493, "step": 62350 }, { "epoch": 1.0948225916887586, "grad_norm": 0.07022938987935388, "learning_rate": 0.0001805367819492097, "loss": 0.8465, "step": 62360 }, { "epoch": 1.09499815656876, "grad_norm": 0.06461527544835577, "learning_rate": 0.00018053014611167195, "loss": 0.8557, "step": 62370 }, { "epoch": 1.0951737214487614, "grad_norm": 0.06056698767609542, "learning_rate": 0.00018052350926647202, "loss": 0.8534, "step": 62380 }, { "epoch": 1.0953492863287628, "grad_norm": 0.08833395541729215, "learning_rate": 0.00018051687141369397, "loss": 0.8453, "step": 62390 }, { "epoch": 1.095524851208764, "grad_norm": 0.0504368964767674, "learning_rate": 0.00018051023255342193, "loss": 0.8544, "step": 62400 }, { "epoch": 1.0957004160887656, "grad_norm": 0.07123460281915148, "learning_rate": 0.00018050359268574002, "loss": 0.8613, "step": 62410 }, { "epoch": 1.095875980968767, "grad_norm": 0.06489886044733674, "learning_rate": 0.00018049695181073235, "loss": 0.8531, "step": 62420 }, { "epoch": 1.0960515458487685, "grad_norm": 0.048775178602669486, "learning_rate": 0.00018049030992848308, "loss": 0.8522, "step": 62430 }, { "epoch": 1.0962271107287698, "grad_norm": 0.06020943606535469, "learning_rate": 0.00018048366703907634, "loss": 0.8491, "step": 62440 }, { "epoch": 1.0964026756087712, "grad_norm": 0.09788900778500571, "learning_rate": 0.00018047702314259628, "loss": 0.8542, "step": 62450 }, { "epoch": 1.0965782404887727, "grad_norm": 0.05899009803508139, "learning_rate": 0.00018047037823912712, "loss": 0.8504, "step": 62460 }, { "epoch": 1.096753805368774, "grad_norm": 0.06939540620152164, "learning_rate": 0.00018046373232875304, "loss": 0.8484, "step": 62470 }, { "epoch": 1.0969293702487755, "grad_norm": 0.052364327132331204, "learning_rate": 0.00018045708541155825, "loss": 0.8471, "step": 62480 }, { "epoch": 1.097104935128777, "grad_norm": 0.05800470555926506, "learning_rate": 0.00018045043748762693, "loss": 0.8551, "step": 62490 }, { "epoch": 1.0972805000087782, "grad_norm": 0.04639319789121787, "learning_rate": 0.00018044378855704336, "loss": 0.8554, "step": 62500 }, { "epoch": 1.0974560648887797, "grad_norm": 0.0646342589031617, "learning_rate": 0.00018043713861989178, "loss": 0.848, "step": 62510 }, { "epoch": 1.0976316297687811, "grad_norm": 0.06315564122072045, "learning_rate": 0.00018043048767625643, "loss": 0.8565, "step": 62520 }, { "epoch": 1.0978071946487824, "grad_norm": 0.06466841618070558, "learning_rate": 0.00018042383572622157, "loss": 0.8541, "step": 62530 }, { "epoch": 1.0979827595287839, "grad_norm": 0.060684908154012326, "learning_rate": 0.00018041718276987144, "loss": 0.8527, "step": 62540 }, { "epoch": 1.0981583244087854, "grad_norm": 0.07587011901486988, "learning_rate": 0.00018041052880729043, "loss": 0.8506, "step": 62550 }, { "epoch": 1.0983338892887866, "grad_norm": 0.054362218018544815, "learning_rate": 0.0001804038738385628, "loss": 0.8547, "step": 62560 }, { "epoch": 1.098509454168788, "grad_norm": 0.06182183298292023, "learning_rate": 0.0001803972178637729, "loss": 0.8439, "step": 62570 }, { "epoch": 1.0986850190487896, "grad_norm": 0.07140853584081713, "learning_rate": 0.000180390560883005, "loss": 0.8539, "step": 62580 }, { "epoch": 1.0988605839287908, "grad_norm": 0.061352432974223846, "learning_rate": 0.00018038390289634346, "loss": 0.8534, "step": 62590 }, { "epoch": 1.0990361488087923, "grad_norm": 0.05323662902742413, "learning_rate": 0.00018037724390387267, "loss": 0.8447, "step": 62600 }, { "epoch": 1.0992117136887938, "grad_norm": 0.056477550406711326, "learning_rate": 0.000180370583905677, "loss": 0.8501, "step": 62610 }, { "epoch": 1.099387278568795, "grad_norm": 0.07379514660580348, "learning_rate": 0.0001803639229018408, "loss": 0.8488, "step": 62620 }, { "epoch": 1.0995628434487965, "grad_norm": 0.07947353526632331, "learning_rate": 0.00018035726089244854, "loss": 0.8506, "step": 62630 }, { "epoch": 1.099738408328798, "grad_norm": 0.06633609644949286, "learning_rate": 0.00018035059787758454, "loss": 0.8454, "step": 62640 }, { "epoch": 1.0999139732087992, "grad_norm": 0.05061086776793537, "learning_rate": 0.00018034393385733322, "loss": 0.8445, "step": 62650 }, { "epoch": 1.1000895380888007, "grad_norm": 0.0923237645197295, "learning_rate": 0.0001803372688317791, "loss": 0.854, "step": 62660 }, { "epoch": 1.1002651029688022, "grad_norm": 0.053915707627111, "learning_rate": 0.00018033060280100657, "loss": 0.851, "step": 62670 }, { "epoch": 1.1004406678488035, "grad_norm": 0.06730922242750162, "learning_rate": 0.00018032393576510007, "loss": 0.8442, "step": 62680 }, { "epoch": 1.100616232728805, "grad_norm": 0.05121199540074441, "learning_rate": 0.0001803172677241441, "loss": 0.8491, "step": 62690 }, { "epoch": 1.1007917976088064, "grad_norm": 0.08899140122181526, "learning_rate": 0.00018031059867822322, "loss": 0.8492, "step": 62700 }, { "epoch": 1.1009673624888077, "grad_norm": 0.09365323087330481, "learning_rate": 0.0001803039286274218, "loss": 0.8594, "step": 62710 }, { "epoch": 1.1011429273688091, "grad_norm": 0.05289444686322601, "learning_rate": 0.0001802972575718244, "loss": 0.8493, "step": 62720 }, { "epoch": 1.1013184922488106, "grad_norm": 0.04922873006672966, "learning_rate": 0.00018029058551151557, "loss": 0.8516, "step": 62730 }, { "epoch": 1.1014940571288119, "grad_norm": 0.06375934299782218, "learning_rate": 0.00018028391244657982, "loss": 0.8588, "step": 62740 }, { "epoch": 1.1016696220088134, "grad_norm": 0.06889876938072478, "learning_rate": 0.0001802772383771017, "loss": 0.8563, "step": 62750 }, { "epoch": 1.1018451868888148, "grad_norm": 0.05156762361187861, "learning_rate": 0.00018027056330316578, "loss": 0.8483, "step": 62760 }, { "epoch": 1.102020751768816, "grad_norm": 0.05789546086691783, "learning_rate": 0.0001802638872248566, "loss": 0.8546, "step": 62770 }, { "epoch": 1.1021963166488176, "grad_norm": 0.06991982227207343, "learning_rate": 0.00018025721014225884, "loss": 0.8612, "step": 62780 }, { "epoch": 1.102371881528819, "grad_norm": 0.06941891744316418, "learning_rate": 0.000180250532055457, "loss": 0.8594, "step": 62790 }, { "epoch": 1.1025474464088203, "grad_norm": 0.07064434819914521, "learning_rate": 0.00018024385296453574, "loss": 0.8487, "step": 62800 }, { "epoch": 1.1027230112888218, "grad_norm": 0.06406216162939506, "learning_rate": 0.00018023717286957966, "loss": 0.85, "step": 62810 }, { "epoch": 1.1028985761688233, "grad_norm": 0.06164652363047016, "learning_rate": 0.00018023049177067345, "loss": 0.8503, "step": 62820 }, { "epoch": 1.1030741410488245, "grad_norm": 0.05655805806531401, "learning_rate": 0.00018022380966790168, "loss": 0.8508, "step": 62830 }, { "epoch": 1.103249705928826, "grad_norm": 0.05893699772832099, "learning_rate": 0.0001802171265613491, "loss": 0.8492, "step": 62840 }, { "epoch": 1.1034252708088275, "grad_norm": 0.0704355296045491, "learning_rate": 0.0001802104424511003, "loss": 0.8475, "step": 62850 }, { "epoch": 1.1036008356888287, "grad_norm": 0.059920991490145264, "learning_rate": 0.00018020375733724005, "loss": 0.854, "step": 62860 }, { "epoch": 1.1037764005688302, "grad_norm": 0.08096592676760217, "learning_rate": 0.000180197071219853, "loss": 0.8514, "step": 62870 }, { "epoch": 1.1039519654488317, "grad_norm": 0.05561686236379571, "learning_rate": 0.00018019038409902388, "loss": 0.8548, "step": 62880 }, { "epoch": 1.104127530328833, "grad_norm": 0.04836728217699151, "learning_rate": 0.0001801836959748374, "loss": 0.8508, "step": 62890 }, { "epoch": 1.1043030952088344, "grad_norm": 0.06222220608742484, "learning_rate": 0.00018017700684737836, "loss": 0.8484, "step": 62900 }, { "epoch": 1.1044786600888359, "grad_norm": 0.08863599643461202, "learning_rate": 0.00018017031671673143, "loss": 0.8419, "step": 62910 }, { "epoch": 1.1046542249688371, "grad_norm": 0.06418956462334477, "learning_rate": 0.00018016362558298144, "loss": 0.8504, "step": 62920 }, { "epoch": 1.1048297898488386, "grad_norm": 0.04990723683174192, "learning_rate": 0.00018015693344621312, "loss": 0.8488, "step": 62930 }, { "epoch": 1.10500535472884, "grad_norm": 0.06584887878347451, "learning_rate": 0.00018015024030651128, "loss": 0.8428, "step": 62940 }, { "epoch": 1.1051809196088414, "grad_norm": 0.11972151271772306, "learning_rate": 0.00018014354616396075, "loss": 0.8554, "step": 62950 }, { "epoch": 1.1053564844888428, "grad_norm": 0.07607075459381887, "learning_rate": 0.00018013685101864635, "loss": 0.8464, "step": 62960 }, { "epoch": 1.1055320493688443, "grad_norm": 0.05442022113597486, "learning_rate": 0.00018013015487065284, "loss": 0.8485, "step": 62970 }, { "epoch": 1.1057076142488456, "grad_norm": 0.06839038394430898, "learning_rate": 0.00018012345772006508, "loss": 0.8453, "step": 62980 }, { "epoch": 1.105883179128847, "grad_norm": 0.06193393867733681, "learning_rate": 0.00018011675956696797, "loss": 0.8511, "step": 62990 }, { "epoch": 1.1060587440088485, "grad_norm": 0.11295017275503236, "learning_rate": 0.00018011006041144636, "loss": 0.848, "step": 63000 }, { "epoch": 1.1062343088888498, "grad_norm": 0.05425598863892647, "learning_rate": 0.0001801033602535851, "loss": 0.8473, "step": 63010 }, { "epoch": 1.1064098737688512, "grad_norm": 0.06241776887133908, "learning_rate": 0.00018009665909346915, "loss": 0.8473, "step": 63020 }, { "epoch": 1.1065854386488527, "grad_norm": 0.05766296909913649, "learning_rate": 0.00018008995693118333, "loss": 0.8461, "step": 63030 }, { "epoch": 1.1067610035288542, "grad_norm": 0.055452190402101274, "learning_rate": 0.00018008325376681262, "loss": 0.845, "step": 63040 }, { "epoch": 1.1069365684088555, "grad_norm": 0.06397577342526146, "learning_rate": 0.00018007654960044191, "loss": 0.8482, "step": 63050 }, { "epoch": 1.107112133288857, "grad_norm": 0.07827388631886845, "learning_rate": 0.0001800698444321562, "loss": 0.8485, "step": 63060 }, { "epoch": 1.1072876981688582, "grad_norm": 0.07395233042588756, "learning_rate": 0.00018006313826204038, "loss": 0.8454, "step": 63070 }, { "epoch": 1.1074632630488597, "grad_norm": 0.045638669454953415, "learning_rate": 0.00018005643109017942, "loss": 0.847, "step": 63080 }, { "epoch": 1.1076388279288611, "grad_norm": 0.061520712755089225, "learning_rate": 0.00018004972291665836, "loss": 0.845, "step": 63090 }, { "epoch": 1.1078143928088626, "grad_norm": 0.05332700926653336, "learning_rate": 0.00018004301374156215, "loss": 0.8544, "step": 63100 }, { "epoch": 1.1079899576888639, "grad_norm": 0.06567077990023226, "learning_rate": 0.00018003630356497582, "loss": 0.8551, "step": 63110 }, { "epoch": 1.1081655225688654, "grad_norm": 0.0880887561139268, "learning_rate": 0.00018002959238698436, "loss": 0.8451, "step": 63120 }, { "epoch": 1.1083410874488666, "grad_norm": 0.04606076345563506, "learning_rate": 0.00018002288020767284, "loss": 0.8504, "step": 63130 }, { "epoch": 1.108516652328868, "grad_norm": 0.052002597272976576, "learning_rate": 0.00018001616702712628, "loss": 0.8591, "step": 63140 }, { "epoch": 1.1086922172088696, "grad_norm": 0.06607111486300729, "learning_rate": 0.00018000945284542974, "loss": 0.8478, "step": 63150 }, { "epoch": 1.108867782088871, "grad_norm": 0.04730530309219683, "learning_rate": 0.00018000273766266827, "loss": 0.8485, "step": 63160 }, { "epoch": 1.1090433469688723, "grad_norm": 0.06268018671524156, "learning_rate": 0.00017999602147892698, "loss": 0.8542, "step": 63170 }, { "epoch": 1.1092189118488738, "grad_norm": 0.04987068547251352, "learning_rate": 0.00017998930429429094, "loss": 0.8442, "step": 63180 }, { "epoch": 1.1093944767288753, "grad_norm": 0.05065291536528936, "learning_rate": 0.00017998258610884532, "loss": 0.8528, "step": 63190 }, { "epoch": 1.1095700416088765, "grad_norm": 0.0632407934084691, "learning_rate": 0.0001799758669226752, "loss": 0.8531, "step": 63200 }, { "epoch": 1.109745606488878, "grad_norm": 0.06632721114776366, "learning_rate": 0.00017996914673586566, "loss": 0.8515, "step": 63210 }, { "epoch": 1.1099211713688795, "grad_norm": 0.07697551963593856, "learning_rate": 0.00017996242554850194, "loss": 0.8494, "step": 63220 }, { "epoch": 1.1100967362488807, "grad_norm": 0.060369546870639536, "learning_rate": 0.0001799557033606691, "loss": 0.8392, "step": 63230 }, { "epoch": 1.1102723011288822, "grad_norm": 0.070138534515946, "learning_rate": 0.00017994898017245243, "loss": 0.8455, "step": 63240 }, { "epoch": 1.1104478660088837, "grad_norm": 0.05315433463846012, "learning_rate": 0.000179942255983937, "loss": 0.8482, "step": 63250 }, { "epoch": 1.110623430888885, "grad_norm": 0.08259724321906353, "learning_rate": 0.0001799355307952081, "loss": 0.8497, "step": 63260 }, { "epoch": 1.1107989957688864, "grad_norm": 0.0494309506364185, "learning_rate": 0.00017992880460635085, "loss": 0.8581, "step": 63270 }, { "epoch": 1.1109745606488879, "grad_norm": 0.05489959474878671, "learning_rate": 0.00017992207741745057, "loss": 0.8499, "step": 63280 }, { "epoch": 1.1111501255288891, "grad_norm": 0.0620599687597512, "learning_rate": 0.00017991534922859244, "loss": 0.8471, "step": 63290 }, { "epoch": 1.1113256904088906, "grad_norm": 0.07629052351182475, "learning_rate": 0.00017990862003986168, "loss": 0.8466, "step": 63300 }, { "epoch": 1.111501255288892, "grad_norm": 0.10185522738588536, "learning_rate": 0.0001799018898513436, "loss": 0.8586, "step": 63310 }, { "epoch": 1.1116768201688934, "grad_norm": 0.061964786413389504, "learning_rate": 0.00017989515866312345, "loss": 0.8479, "step": 63320 }, { "epoch": 1.1118523850488948, "grad_norm": 0.05340941497823469, "learning_rate": 0.00017988842647528652, "loss": 0.8596, "step": 63330 }, { "epoch": 1.1120279499288963, "grad_norm": 0.06763126841374276, "learning_rate": 0.0001798816932879181, "loss": 0.8477, "step": 63340 }, { "epoch": 1.1122035148088976, "grad_norm": 0.06589109557840915, "learning_rate": 0.00017987495910110353, "loss": 0.8501, "step": 63350 }, { "epoch": 1.112379079688899, "grad_norm": 0.05755379930512501, "learning_rate": 0.0001798682239149281, "loss": 0.8558, "step": 63360 }, { "epoch": 1.1125546445689005, "grad_norm": 0.06497907940661354, "learning_rate": 0.00017986148772947716, "loss": 0.851, "step": 63370 }, { "epoch": 1.1127302094489018, "grad_norm": 0.06473328440362316, "learning_rate": 0.00017985475054483604, "loss": 0.8524, "step": 63380 }, { "epoch": 1.1129057743289033, "grad_norm": 0.08054536979728731, "learning_rate": 0.00017984801236109013, "loss": 0.8491, "step": 63390 }, { "epoch": 1.1130813392089047, "grad_norm": 0.05248842397515805, "learning_rate": 0.00017984127317832477, "loss": 0.8474, "step": 63400 }, { "epoch": 1.113256904088906, "grad_norm": 0.04922428781404128, "learning_rate": 0.0001798345329966254, "loss": 0.8457, "step": 63410 }, { "epoch": 1.1134324689689075, "grad_norm": 0.05988162075506045, "learning_rate": 0.00017982779181607738, "loss": 0.8497, "step": 63420 }, { "epoch": 1.113608033848909, "grad_norm": 0.06425647352251809, "learning_rate": 0.00017982104963676614, "loss": 0.8474, "step": 63430 }, { "epoch": 1.1137835987289102, "grad_norm": 0.09163917988138723, "learning_rate": 0.0001798143064587771, "loss": 0.8491, "step": 63440 }, { "epoch": 1.1139591636089117, "grad_norm": 0.07587613444955431, "learning_rate": 0.00017980756228219567, "loss": 0.8538, "step": 63450 }, { "epoch": 1.1141347284889132, "grad_norm": 0.055592415185579794, "learning_rate": 0.0001798008171071073, "loss": 0.8504, "step": 63460 }, { "epoch": 1.1143102933689144, "grad_norm": 0.057643432323901116, "learning_rate": 0.00017979407093359753, "loss": 0.8464, "step": 63470 }, { "epoch": 1.1144858582489159, "grad_norm": 0.05439464235901788, "learning_rate": 0.00017978732376175175, "loss": 0.8511, "step": 63480 }, { "epoch": 1.1146614231289174, "grad_norm": 0.05489035101905239, "learning_rate": 0.0001797805755916555, "loss": 0.8537, "step": 63490 }, { "epoch": 1.1148369880089186, "grad_norm": 0.045633988544810905, "learning_rate": 0.00017977382642339422, "loss": 0.849, "step": 63500 }, { "epoch": 1.11501255288892, "grad_norm": 0.06463972821484633, "learning_rate": 0.0001797670762570535, "loss": 0.854, "step": 63510 }, { "epoch": 1.1151881177689216, "grad_norm": 0.06529186837019142, "learning_rate": 0.00017976032509271878, "loss": 0.8526, "step": 63520 }, { "epoch": 1.1153636826489228, "grad_norm": 0.055228396535086356, "learning_rate": 0.0001797535729304757, "loss": 0.8477, "step": 63530 }, { "epoch": 1.1155392475289243, "grad_norm": 0.06977386670840079, "learning_rate": 0.00017974681977040973, "loss": 0.8547, "step": 63540 }, { "epoch": 1.1157148124089258, "grad_norm": 0.07482228447544345, "learning_rate": 0.00017974006561260648, "loss": 0.8511, "step": 63550 }, { "epoch": 1.115890377288927, "grad_norm": 0.058189156271562605, "learning_rate": 0.0001797333104571515, "loss": 0.8513, "step": 63560 }, { "epoch": 1.1160659421689285, "grad_norm": 0.0606953135572274, "learning_rate": 0.00017972655430413038, "loss": 0.8455, "step": 63570 }, { "epoch": 1.11624150704893, "grad_norm": 0.06540286732274568, "learning_rate": 0.00017971979715362873, "loss": 0.8471, "step": 63580 }, { "epoch": 1.1164170719289312, "grad_norm": 0.054966008055933484, "learning_rate": 0.00017971303900573218, "loss": 0.8492, "step": 63590 }, { "epoch": 1.1165926368089327, "grad_norm": 0.04253565300491529, "learning_rate": 0.00017970627986052632, "loss": 0.8479, "step": 63600 }, { "epoch": 1.1167682016889342, "grad_norm": 0.06370850176978006, "learning_rate": 0.0001796995197180968, "loss": 0.8538, "step": 63610 }, { "epoch": 1.1169437665689355, "grad_norm": 0.06748679661476702, "learning_rate": 0.00017969275857852932, "loss": 0.8535, "step": 63620 }, { "epoch": 1.117119331448937, "grad_norm": 0.07719412918408544, "learning_rate": 0.00017968599644190946, "loss": 0.8564, "step": 63630 }, { "epoch": 1.1172948963289384, "grad_norm": 0.052356051567401085, "learning_rate": 0.000179679233308323, "loss": 0.8521, "step": 63640 }, { "epoch": 1.1174704612089397, "grad_norm": 0.04828840376406467, "learning_rate": 0.00017967246917785555, "loss": 0.8505, "step": 63650 }, { "epoch": 1.1176460260889411, "grad_norm": 0.07468008035109112, "learning_rate": 0.00017966570405059283, "loss": 0.8496, "step": 63660 }, { "epoch": 1.1178215909689426, "grad_norm": 0.054770199149431674, "learning_rate": 0.00017965893792662056, "loss": 0.8543, "step": 63670 }, { "epoch": 1.1179971558489439, "grad_norm": 0.057438741626564996, "learning_rate": 0.0001796521708060245, "loss": 0.8632, "step": 63680 }, { "epoch": 1.1181727207289454, "grad_norm": 0.04785928792656027, "learning_rate": 0.00017964540268889035, "loss": 0.8522, "step": 63690 }, { "epoch": 1.1183482856089468, "grad_norm": 0.050807078002749646, "learning_rate": 0.00017963863357530385, "loss": 0.85, "step": 63700 }, { "epoch": 1.118523850488948, "grad_norm": 0.052668540826652545, "learning_rate": 0.00017963186346535082, "loss": 0.8511, "step": 63710 }, { "epoch": 1.1186994153689496, "grad_norm": 0.06840474290110996, "learning_rate": 0.000179625092359117, "loss": 0.8449, "step": 63720 }, { "epoch": 1.118874980248951, "grad_norm": 0.06643656486381799, "learning_rate": 0.00017961832025668818, "loss": 0.8494, "step": 63730 }, { "epoch": 1.1190505451289523, "grad_norm": 0.0647990380535818, "learning_rate": 0.0001796115471581502, "loss": 0.8455, "step": 63740 }, { "epoch": 1.1192261100089538, "grad_norm": 0.05093674943920201, "learning_rate": 0.0001796047730635888, "loss": 0.8602, "step": 63750 }, { "epoch": 1.1194016748889553, "grad_norm": 0.07167989582617355, "learning_rate": 0.00017959799797308994, "loss": 0.8467, "step": 63760 }, { "epoch": 1.1195772397689567, "grad_norm": 0.054552500018295834, "learning_rate": 0.00017959122188673936, "loss": 0.8454, "step": 63770 }, { "epoch": 1.119752804648958, "grad_norm": 0.06017898660213819, "learning_rate": 0.0001795844448046229, "loss": 0.8474, "step": 63780 }, { "epoch": 1.1199283695289595, "grad_norm": 0.06797131122057282, "learning_rate": 0.0001795776667268265, "loss": 0.8604, "step": 63790 }, { "epoch": 1.1201039344089607, "grad_norm": 0.058895838436704526, "learning_rate": 0.00017957088765343596, "loss": 0.8522, "step": 63800 }, { "epoch": 1.1202794992889622, "grad_norm": 0.06010545814142448, "learning_rate": 0.00017956410758453726, "loss": 0.8483, "step": 63810 }, { "epoch": 1.1204550641689637, "grad_norm": 0.056630815566001545, "learning_rate": 0.00017955732652021625, "loss": 0.8474, "step": 63820 }, { "epoch": 1.1206306290489652, "grad_norm": 0.05695779608103413, "learning_rate": 0.0001795505444605588, "loss": 0.8474, "step": 63830 }, { "epoch": 1.1208061939289664, "grad_norm": 0.05134219592439935, "learning_rate": 0.00017954376140565096, "loss": 0.851, "step": 63840 }, { "epoch": 1.1209817588089679, "grad_norm": 0.050805892462023106, "learning_rate": 0.0001795369773555786, "loss": 0.8516, "step": 63850 }, { "epoch": 1.1211573236889691, "grad_norm": 0.07672570583304174, "learning_rate": 0.00017953019231042766, "loss": 0.85, "step": 63860 }, { "epoch": 1.1213328885689706, "grad_norm": 0.06539542807973796, "learning_rate": 0.0001795234062702842, "loss": 0.8528, "step": 63870 }, { "epoch": 1.121508453448972, "grad_norm": 0.0462204765271002, "learning_rate": 0.00017951661923523406, "loss": 0.8484, "step": 63880 }, { "epoch": 1.1216840183289736, "grad_norm": 0.08899826063453344, "learning_rate": 0.00017950983120536333, "loss": 0.8508, "step": 63890 }, { "epoch": 1.1218595832089748, "grad_norm": 0.06570779841109815, "learning_rate": 0.00017950304218075798, "loss": 0.8479, "step": 63900 }, { "epoch": 1.1220351480889763, "grad_norm": 0.10201169511920036, "learning_rate": 0.0001794962521615041, "loss": 0.8526, "step": 63910 }, { "epoch": 1.1222107129689778, "grad_norm": 0.06344461443317778, "learning_rate": 0.0001794894611476876, "loss": 0.8488, "step": 63920 }, { "epoch": 1.122386277848979, "grad_norm": 0.06433479980017592, "learning_rate": 0.00017948266913939462, "loss": 0.8408, "step": 63930 }, { "epoch": 1.1225618427289805, "grad_norm": 0.07547090931739021, "learning_rate": 0.00017947587613671118, "loss": 0.852, "step": 63940 }, { "epoch": 1.122737407608982, "grad_norm": 0.06366747201900605, "learning_rate": 0.0001794690821397233, "loss": 0.8463, "step": 63950 }, { "epoch": 1.1229129724889833, "grad_norm": 0.05412937108067666, "learning_rate": 0.00017946228714851717, "loss": 0.8578, "step": 63960 }, { "epoch": 1.1230885373689847, "grad_norm": 0.05537034430084289, "learning_rate": 0.00017945549116317878, "loss": 0.8532, "step": 63970 }, { "epoch": 1.1232641022489862, "grad_norm": 0.08924424820753646, "learning_rate": 0.00017944869418379432, "loss": 0.848, "step": 63980 }, { "epoch": 1.1234396671289875, "grad_norm": 0.06083656503513563, "learning_rate": 0.00017944189621044984, "loss": 0.8476, "step": 63990 }, { "epoch": 1.123615232008989, "grad_norm": 0.057553205145431025, "learning_rate": 0.00017943509724323154, "loss": 0.8475, "step": 64000 }, { "epoch": 1.1237907968889904, "grad_norm": 0.05636739618677118, "learning_rate": 0.00017942829728222548, "loss": 0.8485, "step": 64010 }, { "epoch": 1.1239663617689917, "grad_norm": 0.04436540993238254, "learning_rate": 0.00017942149632751788, "loss": 0.8522, "step": 64020 }, { "epoch": 1.1241419266489932, "grad_norm": 0.06749653204007962, "learning_rate": 0.0001794146943791949, "loss": 0.8494, "step": 64030 }, { "epoch": 1.1243174915289946, "grad_norm": 0.059252310061947706, "learning_rate": 0.00017940789143734272, "loss": 0.8588, "step": 64040 }, { "epoch": 1.1244930564089959, "grad_norm": 0.0754921639203431, "learning_rate": 0.0001794010875020475, "loss": 0.8563, "step": 64050 }, { "epoch": 1.1246686212889974, "grad_norm": 0.07454543476261988, "learning_rate": 0.00017939428257339552, "loss": 0.8518, "step": 64060 }, { "epoch": 1.1248441861689988, "grad_norm": 0.06136797736512024, "learning_rate": 0.0001793874766514729, "loss": 0.8449, "step": 64070 }, { "epoch": 1.125019751049, "grad_norm": 0.05524850206837931, "learning_rate": 0.00017938066973636596, "loss": 0.8544, "step": 64080 }, { "epoch": 1.1251953159290016, "grad_norm": 0.07724745197306429, "learning_rate": 0.0001793738618281609, "loss": 0.85, "step": 64090 }, { "epoch": 1.125370880809003, "grad_norm": 0.07802334177946706, "learning_rate": 0.00017936705292694402, "loss": 0.8531, "step": 64100 }, { "epoch": 1.1255464456890043, "grad_norm": 0.0980805322732231, "learning_rate": 0.00017936024303280156, "loss": 0.8517, "step": 64110 }, { "epoch": 1.1257220105690058, "grad_norm": 0.06597614040998895, "learning_rate": 0.00017935343214581974, "loss": 0.8531, "step": 64120 }, { "epoch": 1.1258975754490073, "grad_norm": 0.07848159276408537, "learning_rate": 0.00017934662026608498, "loss": 0.8491, "step": 64130 }, { "epoch": 1.1260731403290085, "grad_norm": 0.0934104580981867, "learning_rate": 0.0001793398073936835, "loss": 0.85, "step": 64140 }, { "epoch": 1.12624870520901, "grad_norm": 0.05351224582750287, "learning_rate": 0.00017933299352870165, "loss": 0.8526, "step": 64150 }, { "epoch": 1.1264242700890115, "grad_norm": 0.07484598344893574, "learning_rate": 0.00017932617867122576, "loss": 0.855, "step": 64160 }, { "epoch": 1.1265998349690127, "grad_norm": 0.06095429146051287, "learning_rate": 0.0001793193628213422, "loss": 0.8511, "step": 64170 }, { "epoch": 1.1267753998490142, "grad_norm": 0.0814697978803053, "learning_rate": 0.00017931254597913727, "loss": 0.8522, "step": 64180 }, { "epoch": 1.1269509647290157, "grad_norm": 0.04642813783533168, "learning_rate": 0.00017930572814469736, "loss": 0.8486, "step": 64190 }, { "epoch": 1.127126529609017, "grad_norm": 0.07575311057387364, "learning_rate": 0.00017929890931810892, "loss": 0.8459, "step": 64200 }, { "epoch": 1.1273020944890184, "grad_norm": 0.058050937504048716, "learning_rate": 0.00017929208949945825, "loss": 0.8552, "step": 64210 }, { "epoch": 1.12747765936902, "grad_norm": 0.06337028818505674, "learning_rate": 0.00017928526868883185, "loss": 0.8488, "step": 64220 }, { "epoch": 1.1276532242490211, "grad_norm": 0.055030021595843075, "learning_rate": 0.00017927844688631604, "loss": 0.8491, "step": 64230 }, { "epoch": 1.1278287891290226, "grad_norm": 0.06613403585690923, "learning_rate": 0.00017927162409199734, "loss": 0.8589, "step": 64240 }, { "epoch": 1.128004354009024, "grad_norm": 0.07164951300964377, "learning_rate": 0.00017926480030596212, "loss": 0.8481, "step": 64250 }, { "epoch": 1.1281799188890254, "grad_norm": 0.07371632579140651, "learning_rate": 0.00017925797552829692, "loss": 0.8466, "step": 64260 }, { "epoch": 1.1283554837690268, "grad_norm": 0.08924661464969004, "learning_rate": 0.00017925114975908817, "loss": 0.8575, "step": 64270 }, { "epoch": 1.1285310486490283, "grad_norm": 0.09443891100734772, "learning_rate": 0.00017924432299842234, "loss": 0.854, "step": 64280 }, { "epoch": 1.1287066135290296, "grad_norm": 0.06742019625699383, "learning_rate": 0.00017923749524638593, "loss": 0.8515, "step": 64290 }, { "epoch": 1.128882178409031, "grad_norm": 0.06685700056430416, "learning_rate": 0.0001792306665030655, "loss": 0.8486, "step": 64300 }, { "epoch": 1.1290577432890325, "grad_norm": 0.0866266148296227, "learning_rate": 0.00017922383676854752, "loss": 0.8503, "step": 64310 }, { "epoch": 1.1292333081690338, "grad_norm": 0.06283351195830242, "learning_rate": 0.00017921700604291851, "loss": 0.852, "step": 64320 }, { "epoch": 1.1294088730490353, "grad_norm": 0.06361790580770263, "learning_rate": 0.00017921017432626508, "loss": 0.8451, "step": 64330 }, { "epoch": 1.1295844379290367, "grad_norm": 0.08370812389383761, "learning_rate": 0.0001792033416186737, "loss": 0.8428, "step": 64340 }, { "epoch": 1.129760002809038, "grad_norm": 0.04450841570506968, "learning_rate": 0.00017919650792023103, "loss": 0.8452, "step": 64350 }, { "epoch": 1.1299355676890395, "grad_norm": 0.07117174081116262, "learning_rate": 0.00017918967323102362, "loss": 0.853, "step": 64360 }, { "epoch": 1.130111132569041, "grad_norm": 0.060915954369232694, "learning_rate": 0.00017918283755113807, "loss": 0.851, "step": 64370 }, { "epoch": 1.1302866974490422, "grad_norm": 0.06518989406944396, "learning_rate": 0.000179176000880661, "loss": 0.8575, "step": 64380 }, { "epoch": 1.1304622623290437, "grad_norm": 0.07751870600300023, "learning_rate": 0.00017916916321967898, "loss": 0.854, "step": 64390 }, { "epoch": 1.1306378272090452, "grad_norm": 0.06345096356005113, "learning_rate": 0.0001791623245682787, "loss": 0.8577, "step": 64400 }, { "epoch": 1.1308133920890464, "grad_norm": 0.0689639174664518, "learning_rate": 0.0001791554849265468, "loss": 0.8507, "step": 64410 }, { "epoch": 1.1309889569690479, "grad_norm": 0.062027334642485284, "learning_rate": 0.00017914864429456988, "loss": 0.8569, "step": 64420 }, { "epoch": 1.1311645218490494, "grad_norm": 0.06967239199697572, "learning_rate": 0.0001791418026724347, "loss": 0.8493, "step": 64430 }, { "epoch": 1.1313400867290508, "grad_norm": 0.05711079731577256, "learning_rate": 0.0001791349600602279, "loss": 0.8437, "step": 64440 }, { "epoch": 1.131515651609052, "grad_norm": 0.07575012515068578, "learning_rate": 0.00017912811645803616, "loss": 0.8531, "step": 64450 }, { "epoch": 1.1316912164890536, "grad_norm": 0.07773840431068837, "learning_rate": 0.00017912127186594623, "loss": 0.8476, "step": 64460 }, { "epoch": 1.1318667813690548, "grad_norm": 0.12137045295994249, "learning_rate": 0.0001791144262840448, "loss": 0.8472, "step": 64470 }, { "epoch": 1.1320423462490563, "grad_norm": 0.08501963322417361, "learning_rate": 0.00017910757971241863, "loss": 0.8498, "step": 64480 }, { "epoch": 1.1322179111290578, "grad_norm": 0.08537962872228547, "learning_rate": 0.00017910073215115447, "loss": 0.8582, "step": 64490 }, { "epoch": 1.1323934760090593, "grad_norm": 0.04804774428462465, "learning_rate": 0.00017909388360033904, "loss": 0.8445, "step": 64500 }, { "epoch": 1.1325690408890605, "grad_norm": 0.06128329390485867, "learning_rate": 0.00017908703406005916, "loss": 0.8555, "step": 64510 }, { "epoch": 1.132744605769062, "grad_norm": 0.06303780461330723, "learning_rate": 0.0001790801835304016, "loss": 0.8609, "step": 64520 }, { "epoch": 1.1329201706490633, "grad_norm": 0.049871903247899224, "learning_rate": 0.00017907333201145308, "loss": 0.8476, "step": 64530 }, { "epoch": 1.1330957355290647, "grad_norm": 0.0509589994833353, "learning_rate": 0.00017906647950330053, "loss": 0.8551, "step": 64540 }, { "epoch": 1.1332713004090662, "grad_norm": 0.0702463784231623, "learning_rate": 0.00017905962600603078, "loss": 0.8466, "step": 64550 }, { "epoch": 1.1334468652890677, "grad_norm": 0.055926003659947626, "learning_rate": 0.0001790527715197305, "loss": 0.8453, "step": 64560 }, { "epoch": 1.133622430169069, "grad_norm": 0.05750843439463261, "learning_rate": 0.00017904591604448673, "loss": 0.8397, "step": 64570 }, { "epoch": 1.1337979950490704, "grad_norm": 0.056304781294947766, "learning_rate": 0.00017903905958038623, "loss": 0.8463, "step": 64580 }, { "epoch": 1.1339735599290717, "grad_norm": 0.07655366319004069, "learning_rate": 0.00017903220212751585, "loss": 0.8538, "step": 64590 }, { "epoch": 1.1341491248090732, "grad_norm": 0.056090017525195233, "learning_rate": 0.00017902534368596256, "loss": 0.8491, "step": 64600 }, { "epoch": 1.1343246896890746, "grad_norm": 0.05743415201211292, "learning_rate": 0.00017901848425581318, "loss": 0.8542, "step": 64610 }, { "epoch": 1.134500254569076, "grad_norm": 0.11929489585887838, "learning_rate": 0.00017901162383715467, "loss": 0.8505, "step": 64620 }, { "epoch": 1.1346758194490774, "grad_norm": 0.05365608373816396, "learning_rate": 0.00017900476243007393, "loss": 0.8497, "step": 64630 }, { "epoch": 1.1348513843290788, "grad_norm": 0.07708245504602781, "learning_rate": 0.00017899790003465788, "loss": 0.8491, "step": 64640 }, { "epoch": 1.13502694920908, "grad_norm": 0.05825661688318796, "learning_rate": 0.0001789910366509935, "loss": 0.8528, "step": 64650 }, { "epoch": 1.1352025140890816, "grad_norm": 0.07269608965664046, "learning_rate": 0.00017898417227916777, "loss": 0.8488, "step": 64660 }, { "epoch": 1.135378078969083, "grad_norm": 0.09014235189480956, "learning_rate": 0.0001789773069192676, "loss": 0.8472, "step": 64670 }, { "epoch": 1.1355536438490845, "grad_norm": 0.06881296719177808, "learning_rate": 0.00017897044057138003, "loss": 0.8503, "step": 64680 }, { "epoch": 1.1357292087290858, "grad_norm": 0.061919624039048694, "learning_rate": 0.000178963573235592, "loss": 0.858, "step": 64690 }, { "epoch": 1.1359047736090873, "grad_norm": 0.06065188644466135, "learning_rate": 0.0001789567049119906, "loss": 0.8448, "step": 64700 }, { "epoch": 1.1360803384890887, "grad_norm": 0.05618224909674693, "learning_rate": 0.00017894983560066277, "loss": 0.8551, "step": 64710 }, { "epoch": 1.13625590336909, "grad_norm": 0.0549106281157536, "learning_rate": 0.00017894296530169561, "loss": 0.8525, "step": 64720 }, { "epoch": 1.1364314682490915, "grad_norm": 0.04377383434017827, "learning_rate": 0.0001789360940151761, "loss": 0.852, "step": 64730 }, { "epoch": 1.136607033129093, "grad_norm": 0.04840613942029026, "learning_rate": 0.0001789292217411914, "loss": 0.8458, "step": 64740 }, { "epoch": 1.1367825980090942, "grad_norm": 0.09741409644743666, "learning_rate": 0.0001789223484798285, "loss": 0.8474, "step": 64750 }, { "epoch": 1.1369581628890957, "grad_norm": 0.0688803534816602, "learning_rate": 0.0001789154742311745, "loss": 0.8556, "step": 64760 }, { "epoch": 1.1371337277690972, "grad_norm": 0.06050206656579917, "learning_rate": 0.0001789085989953165, "loss": 0.8477, "step": 64770 }, { "epoch": 1.1373092926490984, "grad_norm": 0.06671176817032749, "learning_rate": 0.00017890172277234168, "loss": 0.8502, "step": 64780 }, { "epoch": 1.1374848575291, "grad_norm": 0.06443575927909653, "learning_rate": 0.00017889484556233705, "loss": 0.8599, "step": 64790 }, { "epoch": 1.1376604224091014, "grad_norm": 0.06873151186502408, "learning_rate": 0.0001788879673653898, "loss": 0.8426, "step": 64800 }, { "epoch": 1.1378359872891026, "grad_norm": 0.056816200813914045, "learning_rate": 0.0001788810881815871, "loss": 0.8533, "step": 64810 }, { "epoch": 1.138011552169104, "grad_norm": 0.06692795526328259, "learning_rate": 0.00017887420801101605, "loss": 0.8553, "step": 64820 }, { "epoch": 1.1381871170491056, "grad_norm": 0.07825063954329145, "learning_rate": 0.0001788673268537639, "loss": 0.8442, "step": 64830 }, { "epoch": 1.1383626819291068, "grad_norm": 0.08974884392498862, "learning_rate": 0.0001788604447099178, "loss": 0.8529, "step": 64840 }, { "epoch": 1.1385382468091083, "grad_norm": 0.05555451662457341, "learning_rate": 0.00017885356157956486, "loss": 0.8488, "step": 64850 }, { "epoch": 1.1387138116891098, "grad_norm": 0.07419935568109584, "learning_rate": 0.00017884667746279246, "loss": 0.8594, "step": 64860 }, { "epoch": 1.138889376569111, "grad_norm": 0.07241989876288825, "learning_rate": 0.0001788397923596877, "loss": 0.8414, "step": 64870 }, { "epoch": 1.1390649414491125, "grad_norm": 0.058514110104034324, "learning_rate": 0.00017883290627033788, "loss": 0.8528, "step": 64880 }, { "epoch": 1.139240506329114, "grad_norm": 0.04853061063301369, "learning_rate": 0.00017882601919483023, "loss": 0.8532, "step": 64890 }, { "epoch": 1.1394160712091153, "grad_norm": 0.08040436128898949, "learning_rate": 0.00017881913113325195, "loss": 0.8543, "step": 64900 }, { "epoch": 1.1395916360891167, "grad_norm": 0.09264796312076681, "learning_rate": 0.0001788122420856904, "loss": 0.8516, "step": 64910 }, { "epoch": 1.1397672009691182, "grad_norm": 0.059667100313423975, "learning_rate": 0.0001788053520522328, "loss": 0.847, "step": 64920 }, { "epoch": 1.1399427658491195, "grad_norm": 0.07524740511393918, "learning_rate": 0.0001787984610329665, "loss": 0.8503, "step": 64930 }, { "epoch": 1.140118330729121, "grad_norm": 0.06625663446651378, "learning_rate": 0.0001787915690279788, "loss": 0.8522, "step": 64940 }, { "epoch": 1.1402938956091224, "grad_norm": 0.061417080641106424, "learning_rate": 0.00017878467603735704, "loss": 0.8465, "step": 64950 }, { "epoch": 1.1404694604891237, "grad_norm": 0.06341511819080986, "learning_rate": 0.00017877778206118846, "loss": 0.8546, "step": 64960 }, { "epoch": 1.1406450253691252, "grad_norm": 0.049282135584748935, "learning_rate": 0.0001787708870995605, "loss": 0.85, "step": 64970 }, { "epoch": 1.1408205902491266, "grad_norm": 0.051013016729853325, "learning_rate": 0.0001787639911525605, "loss": 0.8461, "step": 64980 }, { "epoch": 1.1409961551291279, "grad_norm": 0.07233528856099104, "learning_rate": 0.00017875709422027585, "loss": 0.8549, "step": 64990 }, { "epoch": 1.1411717200091294, "grad_norm": 0.059517333281751836, "learning_rate": 0.00017875019630279385, "loss": 0.8575, "step": 65000 }, { "epoch": 1.1413472848891308, "grad_norm": 0.06874750970607965, "learning_rate": 0.00017874329740020205, "loss": 0.8455, "step": 65010 }, { "epoch": 1.141522849769132, "grad_norm": 0.07630248318196081, "learning_rate": 0.0001787363975125877, "loss": 0.8477, "step": 65020 }, { "epoch": 1.1416984146491336, "grad_norm": 0.04650036900284374, "learning_rate": 0.00017872949664003834, "loss": 0.8517, "step": 65030 }, { "epoch": 1.141873979529135, "grad_norm": 0.05443273047967385, "learning_rate": 0.0001787225947826413, "loss": 0.8459, "step": 65040 }, { "epoch": 1.1420495444091363, "grad_norm": 0.06422070023663035, "learning_rate": 0.00017871569194048416, "loss": 0.8517, "step": 65050 }, { "epoch": 1.1422251092891378, "grad_norm": 0.06780424140532242, "learning_rate": 0.00017870878811365428, "loss": 0.846, "step": 65060 }, { "epoch": 1.1424006741691393, "grad_norm": 0.07474889504490376, "learning_rate": 0.00017870188330223915, "loss": 0.8465, "step": 65070 }, { "epoch": 1.1425762390491405, "grad_norm": 0.05983562406483036, "learning_rate": 0.00017869497750632628, "loss": 0.8434, "step": 65080 }, { "epoch": 1.142751803929142, "grad_norm": 0.08961135539758446, "learning_rate": 0.0001786880707260032, "loss": 0.8513, "step": 65090 }, { "epoch": 1.1429273688091435, "grad_norm": 0.056932888646378466, "learning_rate": 0.00017868116296135728, "loss": 0.8506, "step": 65100 }, { "epoch": 1.1431029336891447, "grad_norm": 0.0657824312930575, "learning_rate": 0.00017867425421247621, "loss": 0.8452, "step": 65110 }, { "epoch": 1.1432784985691462, "grad_norm": 0.07962070235759343, "learning_rate": 0.0001786673444794474, "loss": 0.8508, "step": 65120 }, { "epoch": 1.1434540634491477, "grad_norm": 0.0586930425582206, "learning_rate": 0.0001786604337623585, "loss": 0.8583, "step": 65130 }, { "epoch": 1.143629628329149, "grad_norm": 0.08401509934169699, "learning_rate": 0.00017865352206129702, "loss": 0.8522, "step": 65140 }, { "epoch": 1.1438051932091504, "grad_norm": 0.0521168295431265, "learning_rate": 0.00017864660937635048, "loss": 0.8492, "step": 65150 }, { "epoch": 1.143980758089152, "grad_norm": 0.07806806849333076, "learning_rate": 0.00017863969570760654, "loss": 0.8542, "step": 65160 }, { "epoch": 1.1441563229691534, "grad_norm": 0.042353619799965166, "learning_rate": 0.0001786327810551528, "loss": 0.848, "step": 65170 }, { "epoch": 1.1443318878491546, "grad_norm": 0.07194816461530554, "learning_rate": 0.00017862586541907686, "loss": 0.8466, "step": 65180 }, { "epoch": 1.144507452729156, "grad_norm": 0.048046868198485825, "learning_rate": 0.0001786189487994663, "loss": 0.8564, "step": 65190 }, { "epoch": 1.1446830176091574, "grad_norm": 0.07978523613617172, "learning_rate": 0.00017861203119640882, "loss": 0.8541, "step": 65200 }, { "epoch": 1.1448585824891588, "grad_norm": 0.05346320159863859, "learning_rate": 0.00017860511260999194, "loss": 0.8486, "step": 65210 }, { "epoch": 1.1450341473691603, "grad_norm": 0.05782610333681152, "learning_rate": 0.00017859819304030347, "loss": 0.8474, "step": 65220 }, { "epoch": 1.1452097122491618, "grad_norm": 0.06660565521608956, "learning_rate": 0.00017859127248743103, "loss": 0.8477, "step": 65230 }, { "epoch": 1.145385277129163, "grad_norm": 0.06909388831509862, "learning_rate": 0.00017858435095146228, "loss": 0.8553, "step": 65240 }, { "epoch": 1.1455608420091645, "grad_norm": 0.056537693938721954, "learning_rate": 0.00017857742843248496, "loss": 0.8478, "step": 65250 }, { "epoch": 1.1457364068891658, "grad_norm": 0.046399326564750036, "learning_rate": 0.00017857050493058676, "loss": 0.8464, "step": 65260 }, { "epoch": 1.1459119717691673, "grad_norm": 0.06022187546369767, "learning_rate": 0.00017856358044585535, "loss": 0.8469, "step": 65270 }, { "epoch": 1.1460875366491687, "grad_norm": 0.05087639665262646, "learning_rate": 0.0001785566549783785, "loss": 0.8497, "step": 65280 }, { "epoch": 1.1462631015291702, "grad_norm": 0.059465141112899716, "learning_rate": 0.00017854972852824404, "loss": 0.8531, "step": 65290 }, { "epoch": 1.1464386664091715, "grad_norm": 0.0652573809714605, "learning_rate": 0.0001785428010955396, "loss": 0.8553, "step": 65300 }, { "epoch": 1.146614231289173, "grad_norm": 0.061393227535730246, "learning_rate": 0.00017853587268035304, "loss": 0.8566, "step": 65310 }, { "epoch": 1.1467897961691742, "grad_norm": 0.05826298473634454, "learning_rate": 0.00017852894328277208, "loss": 0.8496, "step": 65320 }, { "epoch": 1.1469653610491757, "grad_norm": 0.09054602092904923, "learning_rate": 0.0001785220129028846, "loss": 0.85, "step": 65330 }, { "epoch": 1.1471409259291772, "grad_norm": 0.04859043337884091, "learning_rate": 0.0001785150815407783, "loss": 0.8571, "step": 65340 }, { "epoch": 1.1473164908091786, "grad_norm": 0.08960439376850579, "learning_rate": 0.0001785081491965411, "loss": 0.8547, "step": 65350 }, { "epoch": 1.14749205568918, "grad_norm": 0.06025314241934363, "learning_rate": 0.00017850121587026076, "loss": 0.8527, "step": 65360 }, { "epoch": 1.1476676205691814, "grad_norm": 0.05595051232837018, "learning_rate": 0.0001784942815620252, "loss": 0.8517, "step": 65370 }, { "epoch": 1.1478431854491826, "grad_norm": 0.06191843871898107, "learning_rate": 0.00017848734627192223, "loss": 0.8514, "step": 65380 }, { "epoch": 1.148018750329184, "grad_norm": 0.06310461945938446, "learning_rate": 0.00017848041000003974, "loss": 0.8412, "step": 65390 }, { "epoch": 1.1481943152091856, "grad_norm": 0.053928681668453715, "learning_rate": 0.0001784734727464656, "loss": 0.8478, "step": 65400 }, { "epoch": 1.148369880089187, "grad_norm": 0.062439336317766186, "learning_rate": 0.00017846653451128772, "loss": 0.8537, "step": 65410 }, { "epoch": 1.1485454449691883, "grad_norm": 0.05598444988455744, "learning_rate": 0.00017845959529459398, "loss": 0.8475, "step": 65420 }, { "epoch": 1.1487210098491898, "grad_norm": 0.04602684452453043, "learning_rate": 0.00017845265509647233, "loss": 0.8487, "step": 65430 }, { "epoch": 1.1488965747291913, "grad_norm": 0.05518051956995834, "learning_rate": 0.0001784457139170107, "loss": 0.8559, "step": 65440 }, { "epoch": 1.1490721396091925, "grad_norm": 0.04828103501914329, "learning_rate": 0.00017843877175629703, "loss": 0.8491, "step": 65450 }, { "epoch": 1.149247704489194, "grad_norm": 0.053014525950073325, "learning_rate": 0.00017843182861441927, "loss": 0.846, "step": 65460 }, { "epoch": 1.1494232693691955, "grad_norm": 0.07219504654956571, "learning_rate": 0.00017842488449146545, "loss": 0.8502, "step": 65470 }, { "epoch": 1.1495988342491967, "grad_norm": 0.06352151320230581, "learning_rate": 0.00017841793938752344, "loss": 0.8492, "step": 65480 }, { "epoch": 1.1497743991291982, "grad_norm": 0.06248802473005766, "learning_rate": 0.00017841099330268134, "loss": 0.8513, "step": 65490 }, { "epoch": 1.1499499640091997, "grad_norm": 0.05080056984708842, "learning_rate": 0.0001784040462370271, "loss": 0.8516, "step": 65500 }, { "epoch": 1.150125528889201, "grad_norm": 0.06148073762187769, "learning_rate": 0.00017839709819064876, "loss": 0.8514, "step": 65510 }, { "epoch": 1.1503010937692024, "grad_norm": 0.10190667229300787, "learning_rate": 0.00017839014916363438, "loss": 0.8545, "step": 65520 }, { "epoch": 1.150476658649204, "grad_norm": 0.0594858067383562, "learning_rate": 0.00017838319915607195, "loss": 0.853, "step": 65530 }, { "epoch": 1.1506522235292052, "grad_norm": 0.08019502094252026, "learning_rate": 0.00017837624816804962, "loss": 0.8516, "step": 65540 }, { "epoch": 1.1508277884092066, "grad_norm": 0.053618058554678404, "learning_rate": 0.0001783692961996553, "loss": 0.8514, "step": 65550 }, { "epoch": 1.151003353289208, "grad_norm": 0.0822578792721255, "learning_rate": 0.0001783623432509772, "loss": 0.8556, "step": 65560 }, { "epoch": 1.1511789181692094, "grad_norm": 0.06767396284919129, "learning_rate": 0.00017835538932210341, "loss": 0.8487, "step": 65570 }, { "epoch": 1.1513544830492108, "grad_norm": 0.06978358682158159, "learning_rate": 0.000178348434413122, "loss": 0.8585, "step": 65580 }, { "epoch": 1.1515300479292123, "grad_norm": 0.047352717410492275, "learning_rate": 0.00017834147852412112, "loss": 0.856, "step": 65590 }, { "epoch": 1.1517056128092136, "grad_norm": 0.07250102885218605, "learning_rate": 0.00017833452165518884, "loss": 0.8493, "step": 65600 }, { "epoch": 1.151881177689215, "grad_norm": 0.047910922456476136, "learning_rate": 0.00017832756380641338, "loss": 0.8506, "step": 65610 }, { "epoch": 1.1520567425692165, "grad_norm": 0.0695945232496535, "learning_rate": 0.00017832060497788285, "loss": 0.8503, "step": 65620 }, { "epoch": 1.1522323074492178, "grad_norm": 0.1060707798693782, "learning_rate": 0.00017831364516968546, "loss": 0.8537, "step": 65630 }, { "epoch": 1.1524078723292193, "grad_norm": 0.07219199352828064, "learning_rate": 0.00017830668438190936, "loss": 0.8521, "step": 65640 }, { "epoch": 1.1525834372092207, "grad_norm": 0.09840239923815636, "learning_rate": 0.00017829972261464277, "loss": 0.8484, "step": 65650 }, { "epoch": 1.152759002089222, "grad_norm": 0.06936060986343986, "learning_rate": 0.00017829275986797383, "loss": 0.8454, "step": 65660 }, { "epoch": 1.1529345669692235, "grad_norm": 0.08478936690582792, "learning_rate": 0.00017828579614199083, "loss": 0.8451, "step": 65670 }, { "epoch": 1.153110131849225, "grad_norm": 0.07468908332686115, "learning_rate": 0.00017827883143678203, "loss": 0.8453, "step": 65680 }, { "epoch": 1.1532856967292262, "grad_norm": 0.04748669721207155, "learning_rate": 0.0001782718657524356, "loss": 0.8585, "step": 65690 }, { "epoch": 1.1534612616092277, "grad_norm": 0.050998189568138036, "learning_rate": 0.0001782648990890398, "loss": 0.8502, "step": 65700 }, { "epoch": 1.1536368264892292, "grad_norm": 0.049289143700438146, "learning_rate": 0.0001782579314466829, "loss": 0.8564, "step": 65710 }, { "epoch": 1.1538123913692304, "grad_norm": 0.05417514148235377, "learning_rate": 0.00017825096282545326, "loss": 0.8517, "step": 65720 }, { "epoch": 1.153987956249232, "grad_norm": 0.04615473636225306, "learning_rate": 0.0001782439932254391, "loss": 0.8402, "step": 65730 }, { "epoch": 1.1541635211292334, "grad_norm": 0.0691557497355119, "learning_rate": 0.0001782370226467287, "loss": 0.8465, "step": 65740 }, { "epoch": 1.1543390860092346, "grad_norm": 0.06496068070205695, "learning_rate": 0.00017823005108941043, "loss": 0.8564, "step": 65750 }, { "epoch": 1.154514650889236, "grad_norm": 0.05712487144101239, "learning_rate": 0.00017822307855357264, "loss": 0.8471, "step": 65760 }, { "epoch": 1.1546902157692376, "grad_norm": 0.06829034032628047, "learning_rate": 0.00017821610503930367, "loss": 0.8555, "step": 65770 }, { "epoch": 1.1548657806492388, "grad_norm": 0.055511230804936224, "learning_rate": 0.00017820913054669178, "loss": 0.8515, "step": 65780 }, { "epoch": 1.1550413455292403, "grad_norm": 0.050788449536395844, "learning_rate": 0.00017820215507582547, "loss": 0.8576, "step": 65790 }, { "epoch": 1.1552169104092418, "grad_norm": 0.0536047414480835, "learning_rate": 0.00017819517862679304, "loss": 0.8509, "step": 65800 }, { "epoch": 1.155392475289243, "grad_norm": 0.0709795705458288, "learning_rate": 0.00017818820119968289, "loss": 0.8533, "step": 65810 }, { "epoch": 1.1555680401692445, "grad_norm": 0.06281005076658071, "learning_rate": 0.00017818122279458348, "loss": 0.854, "step": 65820 }, { "epoch": 1.155743605049246, "grad_norm": 0.04555517320932309, "learning_rate": 0.00017817424341158312, "loss": 0.851, "step": 65830 }, { "epoch": 1.1559191699292473, "grad_norm": 0.0810383318268404, "learning_rate": 0.00017816726305077034, "loss": 0.8557, "step": 65840 }, { "epoch": 1.1560947348092487, "grad_norm": 0.08307556689303361, "learning_rate": 0.00017816028171223355, "loss": 0.84, "step": 65850 }, { "epoch": 1.1562702996892502, "grad_norm": 0.05552104203818568, "learning_rate": 0.00017815329939606118, "loss": 0.8517, "step": 65860 }, { "epoch": 1.1564458645692515, "grad_norm": 0.05480649367305328, "learning_rate": 0.00017814631610234174, "loss": 0.8509, "step": 65870 }, { "epoch": 1.156621429449253, "grad_norm": 0.05110836644689362, "learning_rate": 0.00017813933183116369, "loss": 0.8436, "step": 65880 }, { "epoch": 1.1567969943292544, "grad_norm": 0.08732043452682244, "learning_rate": 0.00017813234658261553, "loss": 0.8468, "step": 65890 }, { "epoch": 1.156972559209256, "grad_norm": 0.05578635056387005, "learning_rate": 0.00017812536035678573, "loss": 0.8477, "step": 65900 }, { "epoch": 1.1571481240892572, "grad_norm": 0.07419652654532265, "learning_rate": 0.00017811837315376289, "loss": 0.8459, "step": 65910 }, { "epoch": 1.1573236889692586, "grad_norm": 0.06657074637611524, "learning_rate": 0.0001781113849736354, "loss": 0.8524, "step": 65920 }, { "epoch": 1.15749925384926, "grad_norm": 0.05046268180693899, "learning_rate": 0.00017810439581649197, "loss": 0.8513, "step": 65930 }, { "epoch": 1.1576748187292614, "grad_norm": 0.05280512437828936, "learning_rate": 0.000178097405682421, "loss": 0.8467, "step": 65940 }, { "epoch": 1.1578503836092628, "grad_norm": 0.07973949213806192, "learning_rate": 0.00017809041457151118, "loss": 0.848, "step": 65950 }, { "epoch": 1.1580259484892643, "grad_norm": 0.05181168283969551, "learning_rate": 0.00017808342248385096, "loss": 0.8508, "step": 65960 }, { "epoch": 1.1582015133692656, "grad_norm": 0.058440511341605474, "learning_rate": 0.00017807642941952906, "loss": 0.8477, "step": 65970 }, { "epoch": 1.158377078249267, "grad_norm": 0.050259706533456225, "learning_rate": 0.00017806943537863404, "loss": 0.8508, "step": 65980 }, { "epoch": 1.1585526431292683, "grad_norm": 0.05099665276078354, "learning_rate": 0.00017806244036125442, "loss": 0.8544, "step": 65990 }, { "epoch": 1.1587282080092698, "grad_norm": 0.05699468543264203, "learning_rate": 0.000178055444367479, "loss": 0.8511, "step": 66000 }, { "epoch": 1.1589037728892713, "grad_norm": 0.0698138730295014, "learning_rate": 0.00017804844739739633, "loss": 0.8498, "step": 66010 }, { "epoch": 1.1590793377692727, "grad_norm": 0.05385141185150396, "learning_rate": 0.000178041449451095, "loss": 0.8435, "step": 66020 }, { "epoch": 1.159254902649274, "grad_norm": 0.07038408940865239, "learning_rate": 0.00017803445052866378, "loss": 0.8526, "step": 66030 }, { "epoch": 1.1594304675292755, "grad_norm": 0.08329939866489247, "learning_rate": 0.00017802745063019132, "loss": 0.8443, "step": 66040 }, { "epoch": 1.1596060324092767, "grad_norm": 0.058533176732972744, "learning_rate": 0.00017802044975576627, "loss": 0.8574, "step": 66050 }, { "epoch": 1.1597815972892782, "grad_norm": 0.07290921981180094, "learning_rate": 0.0001780134479054774, "loss": 0.8578, "step": 66060 }, { "epoch": 1.1599571621692797, "grad_norm": 0.05808508967732347, "learning_rate": 0.00017800644507941332, "loss": 0.848, "step": 66070 }, { "epoch": 1.1601327270492812, "grad_norm": 0.059870009436683705, "learning_rate": 0.00017799944127766288, "loss": 0.8526, "step": 66080 }, { "epoch": 1.1603082919292824, "grad_norm": 0.04698494323136607, "learning_rate": 0.00017799243650031476, "loss": 0.8501, "step": 66090 }, { "epoch": 1.160483856809284, "grad_norm": 0.05934899117823208, "learning_rate": 0.0001779854307474577, "loss": 0.8492, "step": 66100 }, { "epoch": 1.1606594216892852, "grad_norm": 0.07849186512066335, "learning_rate": 0.00017797842401918047, "loss": 0.841, "step": 66110 }, { "epoch": 1.1608349865692866, "grad_norm": 0.06258717135854691, "learning_rate": 0.00017797141631557188, "loss": 0.8546, "step": 66120 }, { "epoch": 1.161010551449288, "grad_norm": 0.06747446849566724, "learning_rate": 0.00017796440763672065, "loss": 0.8428, "step": 66130 }, { "epoch": 1.1611861163292896, "grad_norm": 0.051314013831164344, "learning_rate": 0.00017795739798271568, "loss": 0.8492, "step": 66140 }, { "epoch": 1.1613616812092908, "grad_norm": 0.058800644165707794, "learning_rate": 0.00017795038735364568, "loss": 0.8565, "step": 66150 }, { "epoch": 1.1615372460892923, "grad_norm": 0.058886693332433106, "learning_rate": 0.00017794337574959953, "loss": 0.8485, "step": 66160 }, { "epoch": 1.1617128109692938, "grad_norm": 0.07156118471696717, "learning_rate": 0.00017793636317066608, "loss": 0.8546, "step": 66170 }, { "epoch": 1.161888375849295, "grad_norm": 0.07887879684927326, "learning_rate": 0.00017792934961693418, "loss": 0.8449, "step": 66180 }, { "epoch": 1.1620639407292965, "grad_norm": 0.07085384397289655, "learning_rate": 0.0001779223350884927, "loss": 0.8529, "step": 66190 }, { "epoch": 1.162239505609298, "grad_norm": 0.06854932196505469, "learning_rate": 0.00017791531958543042, "loss": 0.8564, "step": 66200 }, { "epoch": 1.1624150704892993, "grad_norm": 0.054056520369489686, "learning_rate": 0.0001779083031078363, "loss": 0.8536, "step": 66210 }, { "epoch": 1.1625906353693007, "grad_norm": 0.08141182030914039, "learning_rate": 0.00017790128565579928, "loss": 0.8606, "step": 66220 }, { "epoch": 1.1627662002493022, "grad_norm": 0.08261423208465404, "learning_rate": 0.0001778942672294082, "loss": 0.8457, "step": 66230 }, { "epoch": 1.1629417651293035, "grad_norm": 0.0553588875672764, "learning_rate": 0.00017788724782875207, "loss": 0.8554, "step": 66240 }, { "epoch": 1.163117330009305, "grad_norm": 0.07902314068790685, "learning_rate": 0.00017788022745391974, "loss": 0.8574, "step": 66250 }, { "epoch": 1.1632928948893064, "grad_norm": 0.08599203113786288, "learning_rate": 0.0001778732061050002, "loss": 0.8519, "step": 66260 }, { "epoch": 1.1634684597693077, "grad_norm": 0.06829452050201011, "learning_rate": 0.0001778661837820824, "loss": 0.8512, "step": 66270 }, { "epoch": 1.1636440246493092, "grad_norm": 0.049757628693316056, "learning_rate": 0.00017785916048525534, "loss": 0.8506, "step": 66280 }, { "epoch": 1.1638195895293106, "grad_norm": 0.0623088587243796, "learning_rate": 0.00017785213621460794, "loss": 0.861, "step": 66290 }, { "epoch": 1.163995154409312, "grad_norm": 0.06929691263471661, "learning_rate": 0.00017784511097022928, "loss": 0.8415, "step": 66300 }, { "epoch": 1.1641707192893134, "grad_norm": 0.05268067304510227, "learning_rate": 0.00017783808475220833, "loss": 0.8448, "step": 66310 }, { "epoch": 1.1643462841693148, "grad_norm": 0.0522750847220847, "learning_rate": 0.00017783105756063414, "loss": 0.8472, "step": 66320 }, { "epoch": 1.164521849049316, "grad_norm": 0.05872613315890544, "learning_rate": 0.0001778240293955957, "loss": 0.8546, "step": 66330 }, { "epoch": 1.1646974139293176, "grad_norm": 0.07097497617153894, "learning_rate": 0.00017781700025718212, "loss": 0.8409, "step": 66340 }, { "epoch": 1.164872978809319, "grad_norm": 0.05617131045825325, "learning_rate": 0.00017780997014548239, "loss": 0.8528, "step": 66350 }, { "epoch": 1.1650485436893203, "grad_norm": 0.07083493870573336, "learning_rate": 0.00017780293906058563, "loss": 0.8517, "step": 66360 }, { "epoch": 1.1652241085693218, "grad_norm": 0.05346148718359194, "learning_rate": 0.0001777959070025809, "loss": 0.8524, "step": 66370 }, { "epoch": 1.1653996734493233, "grad_norm": 0.056057092996650654, "learning_rate": 0.00017778887397155733, "loss": 0.8517, "step": 66380 }, { "epoch": 1.1655752383293245, "grad_norm": 0.08400363844570798, "learning_rate": 0.000177781839967604, "loss": 0.8494, "step": 66390 }, { "epoch": 1.165750803209326, "grad_norm": 0.06467932096781472, "learning_rate": 0.00017777480499081002, "loss": 0.8523, "step": 66400 }, { "epoch": 1.1659263680893275, "grad_norm": 0.05664101239783248, "learning_rate": 0.00017776776904126462, "loss": 0.8458, "step": 66410 }, { "epoch": 1.1661019329693287, "grad_norm": 0.05131260601378193, "learning_rate": 0.00017776073211905682, "loss": 0.8483, "step": 66420 }, { "epoch": 1.1662774978493302, "grad_norm": 0.05501310571743309, "learning_rate": 0.0001777536942242758, "loss": 0.8517, "step": 66430 }, { "epoch": 1.1664530627293317, "grad_norm": 0.069582700508036, "learning_rate": 0.00017774665535701085, "loss": 0.8446, "step": 66440 }, { "epoch": 1.166628627609333, "grad_norm": 0.05685396782716212, "learning_rate": 0.00017773961551735098, "loss": 0.8402, "step": 66450 }, { "epoch": 1.1668041924893344, "grad_norm": 0.05284535576380621, "learning_rate": 0.00017773257470538552, "loss": 0.8564, "step": 66460 }, { "epoch": 1.166979757369336, "grad_norm": 0.061397146236154415, "learning_rate": 0.00017772553292120364, "loss": 0.854, "step": 66470 }, { "epoch": 1.1671553222493372, "grad_norm": 0.052443696613391946, "learning_rate": 0.00017771849016489454, "loss": 0.8501, "step": 66480 }, { "epoch": 1.1673308871293386, "grad_norm": 0.059688754470641175, "learning_rate": 0.00017771144643654747, "loss": 0.8445, "step": 66490 }, { "epoch": 1.1675064520093401, "grad_norm": 0.08220822033179061, "learning_rate": 0.00017770440173625165, "loss": 0.8458, "step": 66500 }, { "epoch": 1.1676820168893414, "grad_norm": 0.0519389050332011, "learning_rate": 0.00017769735606409638, "loss": 0.8458, "step": 66510 }, { "epoch": 1.1678575817693428, "grad_norm": 0.05648143183694809, "learning_rate": 0.00017769030942017092, "loss": 0.8464, "step": 66520 }, { "epoch": 1.1680331466493443, "grad_norm": 0.05325645438707554, "learning_rate": 0.0001776832618045645, "loss": 0.8505, "step": 66530 }, { "epoch": 1.1682087115293456, "grad_norm": 0.056350257247781615, "learning_rate": 0.00017767621321736652, "loss": 0.8452, "step": 66540 }, { "epoch": 1.168384276409347, "grad_norm": 0.05845730886864941, "learning_rate": 0.0001776691636586662, "loss": 0.8533, "step": 66550 }, { "epoch": 1.1685598412893485, "grad_norm": 0.060069070624068265, "learning_rate": 0.00017766211312855287, "loss": 0.8505, "step": 66560 }, { "epoch": 1.1687354061693498, "grad_norm": 0.06386736850337466, "learning_rate": 0.0001776550616271159, "loss": 0.848, "step": 66570 }, { "epoch": 1.1689109710493513, "grad_norm": 0.0720697295772668, "learning_rate": 0.00017764800915444457, "loss": 0.8531, "step": 66580 }, { "epoch": 1.1690865359293527, "grad_norm": 0.04392984298939825, "learning_rate": 0.00017764095571062827, "loss": 0.8564, "step": 66590 }, { "epoch": 1.169262100809354, "grad_norm": 0.05599968607601634, "learning_rate": 0.00017763390129575638, "loss": 0.8495, "step": 66600 }, { "epoch": 1.1694376656893555, "grad_norm": 0.0772421089477914, "learning_rate": 0.0001776268459099183, "loss": 0.8518, "step": 66610 }, { "epoch": 1.169613230569357, "grad_norm": 0.0629693221886483, "learning_rate": 0.0001776197895532034, "loss": 0.8516, "step": 66620 }, { "epoch": 1.1697887954493584, "grad_norm": 0.06585383553311876, "learning_rate": 0.00017761273222570106, "loss": 0.8506, "step": 66630 }, { "epoch": 1.1699643603293597, "grad_norm": 0.0710670454714907, "learning_rate": 0.00017760567392750075, "loss": 0.8467, "step": 66640 }, { "epoch": 1.1701399252093612, "grad_norm": 0.0638130130135592, "learning_rate": 0.0001775986146586918, "loss": 0.8528, "step": 66650 }, { "epoch": 1.1703154900893624, "grad_norm": 0.09690107760646788, "learning_rate": 0.0001775915544193638, "loss": 0.8562, "step": 66660 }, { "epoch": 1.170491054969364, "grad_norm": 0.05865057129415335, "learning_rate": 0.0001775844932096061, "loss": 0.8518, "step": 66670 }, { "epoch": 1.1706666198493654, "grad_norm": 0.04545844160917943, "learning_rate": 0.0001775774310295082, "loss": 0.8567, "step": 66680 }, { "epoch": 1.1708421847293669, "grad_norm": 0.05590429581796855, "learning_rate": 0.00017757036787915953, "loss": 0.8587, "step": 66690 }, { "epoch": 1.171017749609368, "grad_norm": 0.06109372377954633, "learning_rate": 0.00017756330375864966, "loss": 0.8435, "step": 66700 }, { "epoch": 1.1711933144893696, "grad_norm": 0.05507628020747419, "learning_rate": 0.00017755623866806805, "loss": 0.8513, "step": 66710 }, { "epoch": 1.1713688793693708, "grad_norm": 0.05216008454888361, "learning_rate": 0.00017754917260750423, "loss": 0.8536, "step": 66720 }, { "epoch": 1.1715444442493723, "grad_norm": 0.0562614898103783, "learning_rate": 0.00017754210557704774, "loss": 0.852, "step": 66730 }, { "epoch": 1.1717200091293738, "grad_norm": 0.0549727426136307, "learning_rate": 0.00017753503757678808, "loss": 0.8528, "step": 66740 }, { "epoch": 1.1718955740093753, "grad_norm": 0.0505680109617179, "learning_rate": 0.00017752796860681481, "loss": 0.8394, "step": 66750 }, { "epoch": 1.1720711388893765, "grad_norm": 0.0548838783419539, "learning_rate": 0.00017752089866721754, "loss": 0.8573, "step": 66760 }, { "epoch": 1.172246703769378, "grad_norm": 0.053355479053723814, "learning_rate": 0.00017751382775808584, "loss": 0.8556, "step": 66770 }, { "epoch": 1.1724222686493793, "grad_norm": 0.07008800785900905, "learning_rate": 0.00017750675587950926, "loss": 0.8492, "step": 66780 }, { "epoch": 1.1725978335293807, "grad_norm": 0.048221391784391415, "learning_rate": 0.00017749968303157739, "loss": 0.8544, "step": 66790 }, { "epoch": 1.1727733984093822, "grad_norm": 0.06091881945858441, "learning_rate": 0.00017749260921437989, "loss": 0.8494, "step": 66800 }, { "epoch": 1.1729489632893837, "grad_norm": 0.05747206685628641, "learning_rate": 0.00017748553442800636, "loss": 0.8454, "step": 66810 }, { "epoch": 1.173124528169385, "grad_norm": 0.045378190164994954, "learning_rate": 0.00017747845867254646, "loss": 0.8483, "step": 66820 }, { "epoch": 1.1733000930493864, "grad_norm": 0.06517101850911525, "learning_rate": 0.00017747138194808986, "loss": 0.8499, "step": 66830 }, { "epoch": 1.1734756579293877, "grad_norm": 0.05041206547529954, "learning_rate": 0.00017746430425472618, "loss": 0.8582, "step": 66840 }, { "epoch": 1.1736512228093892, "grad_norm": 0.05722215125248116, "learning_rate": 0.00017745722559254508, "loss": 0.8435, "step": 66850 }, { "epoch": 1.1738267876893906, "grad_norm": 0.0733420529107375, "learning_rate": 0.00017745014596163628, "loss": 0.8459, "step": 66860 }, { "epoch": 1.1740023525693921, "grad_norm": 0.06847855518168443, "learning_rate": 0.00017744306536208952, "loss": 0.8486, "step": 66870 }, { "epoch": 1.1741779174493934, "grad_norm": 0.07691223758555013, "learning_rate": 0.00017743598379399447, "loss": 0.8544, "step": 66880 }, { "epoch": 1.1743534823293948, "grad_norm": 0.05823793730715337, "learning_rate": 0.00017742890125744084, "loss": 0.8554, "step": 66890 }, { "epoch": 1.1745290472093963, "grad_norm": 0.048486578242918715, "learning_rate": 0.00017742181775251838, "loss": 0.8518, "step": 66900 }, { "epoch": 1.1747046120893976, "grad_norm": 0.057293387698345175, "learning_rate": 0.00017741473327931686, "loss": 0.8545, "step": 66910 }, { "epoch": 1.174880176969399, "grad_norm": 0.07070590777651231, "learning_rate": 0.000177407647837926, "loss": 0.8462, "step": 66920 }, { "epoch": 1.1750557418494005, "grad_norm": 0.06005307381297477, "learning_rate": 0.0001774005614284356, "loss": 0.8552, "step": 66930 }, { "epoch": 1.1752313067294018, "grad_norm": 0.07663227961891576, "learning_rate": 0.00017739347405093547, "loss": 0.843, "step": 66940 }, { "epoch": 1.1754068716094033, "grad_norm": 0.05906012681473801, "learning_rate": 0.00017738638570551532, "loss": 0.8481, "step": 66950 }, { "epoch": 1.1755824364894047, "grad_norm": 0.05788251094674904, "learning_rate": 0.00017737929639226508, "loss": 0.8465, "step": 66960 }, { "epoch": 1.175758001369406, "grad_norm": 0.051338629802013265, "learning_rate": 0.00017737220611127453, "loss": 0.8567, "step": 66970 }, { "epoch": 1.1759335662494075, "grad_norm": 0.06972162911564192, "learning_rate": 0.00017736511486263348, "loss": 0.8495, "step": 66980 }, { "epoch": 1.176109131129409, "grad_norm": 0.05857337274284438, "learning_rate": 0.00017735802264643179, "loss": 0.8431, "step": 66990 }, { "epoch": 1.1762846960094102, "grad_norm": 0.06967364828865967, "learning_rate": 0.00017735092946275928, "loss": 0.8449, "step": 67000 }, { "epoch": 1.1764602608894117, "grad_norm": 0.06054159508014688, "learning_rate": 0.0001773438353117059, "loss": 0.8476, "step": 67010 }, { "epoch": 1.1766358257694132, "grad_norm": 0.06445257492235898, "learning_rate": 0.0001773367401933615, "loss": 0.841, "step": 67020 }, { "epoch": 1.1768113906494144, "grad_norm": 0.060582631884227245, "learning_rate": 0.00017732964410781593, "loss": 0.8527, "step": 67030 }, { "epoch": 1.176986955529416, "grad_norm": 0.06988296285013493, "learning_rate": 0.0001773225470551592, "loss": 0.8605, "step": 67040 }, { "epoch": 1.1771625204094174, "grad_norm": 0.07944978705916177, "learning_rate": 0.00017731544903548113, "loss": 0.8529, "step": 67050 }, { "epoch": 1.1773380852894186, "grad_norm": 0.05961790251007727, "learning_rate": 0.00017730835004887166, "loss": 0.8579, "step": 67060 }, { "epoch": 1.1775136501694201, "grad_norm": 0.06467193746131072, "learning_rate": 0.00017730125009542084, "loss": 0.8475, "step": 67070 }, { "epoch": 1.1776892150494216, "grad_norm": 0.06944216356391178, "learning_rate": 0.00017729414917521852, "loss": 0.8485, "step": 67080 }, { "epoch": 1.1778647799294228, "grad_norm": 0.07832731944921628, "learning_rate": 0.00017728704728835472, "loss": 0.8502, "step": 67090 }, { "epoch": 1.1780403448094243, "grad_norm": 0.07516385358907642, "learning_rate": 0.00017727994443491942, "loss": 0.8444, "step": 67100 }, { "epoch": 1.1782159096894258, "grad_norm": 0.06383906357490506, "learning_rate": 0.0001772728406150026, "loss": 0.854, "step": 67110 }, { "epoch": 1.178391474569427, "grad_norm": 0.08534104672068801, "learning_rate": 0.00017726573582869425, "loss": 0.8538, "step": 67120 }, { "epoch": 1.1785670394494285, "grad_norm": 0.06341679099461608, "learning_rate": 0.00017725863007608445, "loss": 0.8542, "step": 67130 }, { "epoch": 1.17874260432943, "grad_norm": 0.06538292195167111, "learning_rate": 0.00017725152335726314, "loss": 0.8563, "step": 67140 }, { "epoch": 1.1789181692094313, "grad_norm": 0.0628232792343453, "learning_rate": 0.00017724441567232047, "loss": 0.8534, "step": 67150 }, { "epoch": 1.1790937340894327, "grad_norm": 0.05421107862893153, "learning_rate": 0.00017723730702134643, "loss": 0.8523, "step": 67160 }, { "epoch": 1.1792692989694342, "grad_norm": 0.06375443848076065, "learning_rate": 0.00017723019740443105, "loss": 0.8529, "step": 67170 }, { "epoch": 1.1794448638494355, "grad_norm": 0.04716519736196937, "learning_rate": 0.0001772230868216645, "loss": 0.8453, "step": 67180 }, { "epoch": 1.179620428729437, "grad_norm": 0.07236200986764063, "learning_rate": 0.00017721597527313684, "loss": 0.8496, "step": 67190 }, { "epoch": 1.1797959936094384, "grad_norm": 0.06877261101755047, "learning_rate": 0.00017720886275893815, "loss": 0.8573, "step": 67200 }, { "epoch": 1.1799715584894397, "grad_norm": 0.06251214223254317, "learning_rate": 0.0001772017492791586, "loss": 0.8512, "step": 67210 }, { "epoch": 1.1801471233694412, "grad_norm": 0.06175735608481265, "learning_rate": 0.00017719463483388824, "loss": 0.8437, "step": 67220 }, { "epoch": 1.1803226882494426, "grad_norm": 0.05800718277563114, "learning_rate": 0.00017718751942321726, "loss": 0.8479, "step": 67230 }, { "epoch": 1.180498253129444, "grad_norm": 0.058780250262052934, "learning_rate": 0.0001771804030472358, "loss": 0.8454, "step": 67240 }, { "epoch": 1.1806738180094454, "grad_norm": 0.08157186702704701, "learning_rate": 0.00017717328570603403, "loss": 0.8569, "step": 67250 }, { "epoch": 1.1808493828894469, "grad_norm": 0.07622885696536895, "learning_rate": 0.00017716616739970215, "loss": 0.8564, "step": 67260 }, { "epoch": 1.181024947769448, "grad_norm": 0.05343993142616904, "learning_rate": 0.00017715904812833034, "loss": 0.8492, "step": 67270 }, { "epoch": 1.1812005126494496, "grad_norm": 0.08713444112906572, "learning_rate": 0.0001771519278920088, "loss": 0.8509, "step": 67280 }, { "epoch": 1.181376077529451, "grad_norm": 0.06361466937338843, "learning_rate": 0.00017714480669082773, "loss": 0.8476, "step": 67290 }, { "epoch": 1.1815516424094523, "grad_norm": 0.053540267851679105, "learning_rate": 0.00017713768452487733, "loss": 0.8449, "step": 67300 }, { "epoch": 1.1817272072894538, "grad_norm": 0.06492100982754472, "learning_rate": 0.00017713056139424788, "loss": 0.8417, "step": 67310 }, { "epoch": 1.1819027721694553, "grad_norm": 0.04865320494819983, "learning_rate": 0.00017712343729902967, "loss": 0.8558, "step": 67320 }, { "epoch": 1.1820783370494565, "grad_norm": 0.07936294082476478, "learning_rate": 0.0001771163122393129, "loss": 0.8531, "step": 67330 }, { "epoch": 1.182253901929458, "grad_norm": 0.04717877527097928, "learning_rate": 0.00017710918621518788, "loss": 0.8596, "step": 67340 }, { "epoch": 1.1824294668094595, "grad_norm": 0.10725122468949852, "learning_rate": 0.00017710205922674485, "loss": 0.8558, "step": 67350 }, { "epoch": 1.182605031689461, "grad_norm": 0.0642563111647284, "learning_rate": 0.00017709493127407417, "loss": 0.8367, "step": 67360 }, { "epoch": 1.1827805965694622, "grad_norm": 0.07379139757525217, "learning_rate": 0.0001770878023572661, "loss": 0.8501, "step": 67370 }, { "epoch": 1.1829561614494637, "grad_norm": 0.06992606703006038, "learning_rate": 0.00017708067247641104, "loss": 0.8514, "step": 67380 }, { "epoch": 1.183131726329465, "grad_norm": 0.052770859167486577, "learning_rate": 0.00017707354163159925, "loss": 0.8468, "step": 67390 }, { "epoch": 1.1833072912094664, "grad_norm": 0.0808574473202574, "learning_rate": 0.00017706640982292113, "loss": 0.855, "step": 67400 }, { "epoch": 1.183482856089468, "grad_norm": 0.05162558743641875, "learning_rate": 0.000177059277050467, "loss": 0.8522, "step": 67410 }, { "epoch": 1.1836584209694694, "grad_norm": 0.06706585968071539, "learning_rate": 0.00017705214331432725, "loss": 0.8469, "step": 67420 }, { "epoch": 1.1838339858494706, "grad_norm": 0.06167762678608576, "learning_rate": 0.00017704500861459224, "loss": 0.8522, "step": 67430 }, { "epoch": 1.1840095507294721, "grad_norm": 0.05797151475000293, "learning_rate": 0.00017703787295135249, "loss": 0.8524, "step": 67440 }, { "epoch": 1.1841851156094734, "grad_norm": 0.05374671171746175, "learning_rate": 0.00017703073632469824, "loss": 0.8442, "step": 67450 }, { "epoch": 1.1843606804894748, "grad_norm": 0.07224151683310609, "learning_rate": 0.00017702359873472, "loss": 0.8572, "step": 67460 }, { "epoch": 1.1845362453694763, "grad_norm": 0.06220874026731289, "learning_rate": 0.00017701646018150822, "loss": 0.8638, "step": 67470 }, { "epoch": 1.1847118102494778, "grad_norm": 0.05621311471394867, "learning_rate": 0.00017700932066515328, "loss": 0.8573, "step": 67480 }, { "epoch": 1.184887375129479, "grad_norm": 0.07036596814669407, "learning_rate": 0.00017700218018574572, "loss": 0.845, "step": 67490 }, { "epoch": 1.1850629400094805, "grad_norm": 0.0595722105541128, "learning_rate": 0.00017699503874337596, "loss": 0.8456, "step": 67500 }, { "epoch": 1.1852385048894818, "grad_norm": 0.05647392717426963, "learning_rate": 0.0001769878963381345, "loss": 0.8511, "step": 67510 }, { "epoch": 1.1854140697694833, "grad_norm": 0.059567274184094056, "learning_rate": 0.0001769807529701118, "loss": 0.8528, "step": 67520 }, { "epoch": 1.1855896346494847, "grad_norm": 0.08766537573539245, "learning_rate": 0.0001769736086393984, "loss": 0.8515, "step": 67530 }, { "epoch": 1.1857651995294862, "grad_norm": 0.0698475038501547, "learning_rate": 0.00017696646334608486, "loss": 0.8486, "step": 67540 }, { "epoch": 1.1859407644094875, "grad_norm": 0.05508670525679419, "learning_rate": 0.00017695931709026165, "loss": 0.8506, "step": 67550 }, { "epoch": 1.186116329289489, "grad_norm": 0.06242413879525068, "learning_rate": 0.00017695216987201933, "loss": 0.8509, "step": 67560 }, { "epoch": 1.1862918941694902, "grad_norm": 0.0712810759802949, "learning_rate": 0.00017694502169144846, "loss": 0.8461, "step": 67570 }, { "epoch": 1.1864674590494917, "grad_norm": 0.06520369575885583, "learning_rate": 0.00017693787254863963, "loss": 0.8538, "step": 67580 }, { "epoch": 1.1866430239294932, "grad_norm": 0.05920091389444902, "learning_rate": 0.00017693072244368336, "loss": 0.847, "step": 67590 }, { "epoch": 1.1868185888094946, "grad_norm": 0.07630954809771702, "learning_rate": 0.00017692357137667032, "loss": 0.8507, "step": 67600 }, { "epoch": 1.186994153689496, "grad_norm": 0.07928604110414293, "learning_rate": 0.00017691641934769106, "loss": 0.8475, "step": 67610 }, { "epoch": 1.1871697185694974, "grad_norm": 0.059202416191917816, "learning_rate": 0.00017690926635683621, "loss": 0.8429, "step": 67620 }, { "epoch": 1.1873452834494989, "grad_norm": 0.06616237685175655, "learning_rate": 0.00017690211240419643, "loss": 0.8494, "step": 67630 }, { "epoch": 1.1875208483295001, "grad_norm": 0.052689868497912094, "learning_rate": 0.00017689495748986233, "loss": 0.8595, "step": 67640 }, { "epoch": 1.1876964132095016, "grad_norm": 0.06444928198949626, "learning_rate": 0.00017688780161392457, "loss": 0.8604, "step": 67650 }, { "epoch": 1.187871978089503, "grad_norm": 0.08135219749830415, "learning_rate": 0.00017688064477647383, "loss": 0.846, "step": 67660 }, { "epoch": 1.1880475429695043, "grad_norm": 0.05757779110802372, "learning_rate": 0.00017687348697760077, "loss": 0.85, "step": 67670 }, { "epoch": 1.1882231078495058, "grad_norm": 0.0595017749324334, "learning_rate": 0.0001768663282173961, "loss": 0.8488, "step": 67680 }, { "epoch": 1.1883986727295073, "grad_norm": 0.05113542070396218, "learning_rate": 0.0001768591684959505, "loss": 0.85, "step": 67690 }, { "epoch": 1.1885742376095085, "grad_norm": 0.0620066009334156, "learning_rate": 0.00017685200781335472, "loss": 0.8503, "step": 67700 }, { "epoch": 1.18874980248951, "grad_norm": 0.05219265283804171, "learning_rate": 0.00017684484616969944, "loss": 0.8526, "step": 67710 }, { "epoch": 1.1889253673695115, "grad_norm": 0.05424207610501709, "learning_rate": 0.00017683768356507545, "loss": 0.8484, "step": 67720 }, { "epoch": 1.1891009322495127, "grad_norm": 0.05959350588437233, "learning_rate": 0.0001768305199995734, "loss": 0.856, "step": 67730 }, { "epoch": 1.1892764971295142, "grad_norm": 0.05064134188045007, "learning_rate": 0.00017682335547328424, "loss": 0.8556, "step": 67740 }, { "epoch": 1.1894520620095157, "grad_norm": 0.08141825406092594, "learning_rate": 0.00017681618998629855, "loss": 0.8574, "step": 67750 }, { "epoch": 1.189627626889517, "grad_norm": 0.052501959134430266, "learning_rate": 0.00017680902353870725, "loss": 0.8582, "step": 67760 }, { "epoch": 1.1898031917695184, "grad_norm": 0.050729500140934435, "learning_rate": 0.00017680185613060107, "loss": 0.8509, "step": 67770 }, { "epoch": 1.18997875664952, "grad_norm": 0.05037801347633384, "learning_rate": 0.00017679468776207082, "loss": 0.8525, "step": 67780 }, { "epoch": 1.1901543215295212, "grad_norm": 0.05183115904981923, "learning_rate": 0.0001767875184332074, "loss": 0.8556, "step": 67790 }, { "epoch": 1.1903298864095226, "grad_norm": 0.060697644816557816, "learning_rate": 0.00017678034814410157, "loss": 0.8542, "step": 67800 }, { "epoch": 1.1905054512895241, "grad_norm": 0.10894634938825079, "learning_rate": 0.0001767731768948442, "loss": 0.8509, "step": 67810 }, { "epoch": 1.1906810161695254, "grad_norm": 0.075088742111408, "learning_rate": 0.00017676600468552619, "loss": 0.8471, "step": 67820 }, { "epoch": 1.1908565810495269, "grad_norm": 0.08502479283124258, "learning_rate": 0.00017675883151623836, "loss": 0.8471, "step": 67830 }, { "epoch": 1.1910321459295283, "grad_norm": 0.05642416363633012, "learning_rate": 0.0001767516573870716, "loss": 0.8507, "step": 67840 }, { "epoch": 1.1912077108095296, "grad_norm": 0.05797392175268148, "learning_rate": 0.00017674448229811684, "loss": 0.8531, "step": 67850 }, { "epoch": 1.191383275689531, "grad_norm": 0.061552172028314235, "learning_rate": 0.00017673730624946497, "loss": 0.8558, "step": 67860 }, { "epoch": 1.1915588405695325, "grad_norm": 0.07336547511095161, "learning_rate": 0.0001767301292412069, "loss": 0.8495, "step": 67870 }, { "epoch": 1.1917344054495338, "grad_norm": 0.061323290069446285, "learning_rate": 0.0001767229512734336, "loss": 0.8523, "step": 67880 }, { "epoch": 1.1919099703295353, "grad_norm": 0.05896961088571825, "learning_rate": 0.000176715772346236, "loss": 0.8512, "step": 67890 }, { "epoch": 1.1920855352095368, "grad_norm": 0.04801453116647186, "learning_rate": 0.00017670859245970502, "loss": 0.8515, "step": 67900 }, { "epoch": 1.192261100089538, "grad_norm": 0.059949185107009195, "learning_rate": 0.0001767014116139317, "loss": 0.8504, "step": 67910 }, { "epoch": 1.1924366649695395, "grad_norm": 0.08926758502281133, "learning_rate": 0.000176694229809007, "loss": 0.8542, "step": 67920 }, { "epoch": 1.192612229849541, "grad_norm": 0.08101465792457696, "learning_rate": 0.00017668704704502185, "loss": 0.8479, "step": 67930 }, { "epoch": 1.1927877947295422, "grad_norm": 0.05654519803962655, "learning_rate": 0.00017667986332206732, "loss": 0.8502, "step": 67940 }, { "epoch": 1.1929633596095437, "grad_norm": 0.052927380340810015, "learning_rate": 0.00017667267864023445, "loss": 0.8575, "step": 67950 }, { "epoch": 1.1931389244895452, "grad_norm": 0.05991060120884993, "learning_rate": 0.00017666549299961423, "loss": 0.8505, "step": 67960 }, { "epoch": 1.1933144893695464, "grad_norm": 0.06226335979452559, "learning_rate": 0.0001766583064002977, "loss": 0.8414, "step": 67970 }, { "epoch": 1.193490054249548, "grad_norm": 0.05328483430086237, "learning_rate": 0.00017665111884237596, "loss": 0.8521, "step": 67980 }, { "epoch": 1.1936656191295494, "grad_norm": 0.05492369408633701, "learning_rate": 0.00017664393032593997, "loss": 0.8517, "step": 67990 }, { "epoch": 1.1938411840095506, "grad_norm": 0.060211674836663225, "learning_rate": 0.00017663674085108098, "loss": 0.8509, "step": 68000 }, { "epoch": 1.1940167488895521, "grad_norm": 0.06713325739527343, "learning_rate": 0.0001766295504178899, "loss": 0.8513, "step": 68010 }, { "epoch": 1.1941923137695536, "grad_norm": 0.07126774735205454, "learning_rate": 0.00017662235902645795, "loss": 0.8445, "step": 68020 }, { "epoch": 1.1943678786495548, "grad_norm": 0.06084211317499684, "learning_rate": 0.00017661516667687624, "loss": 0.8454, "step": 68030 }, { "epoch": 1.1945434435295563, "grad_norm": 0.04738165679386999, "learning_rate": 0.0001766079733692359, "loss": 0.8551, "step": 68040 }, { "epoch": 1.1947190084095578, "grad_norm": 0.08652389995039579, "learning_rate": 0.00017660077910362798, "loss": 0.8497, "step": 68050 }, { "epoch": 1.194894573289559, "grad_norm": 0.06427436917784148, "learning_rate": 0.00017659358388014376, "loss": 0.8506, "step": 68060 }, { "epoch": 1.1950701381695605, "grad_norm": 0.0581965061977424, "learning_rate": 0.0001765863876988743, "loss": 0.8536, "step": 68070 }, { "epoch": 1.195245703049562, "grad_norm": 0.06126236484107329, "learning_rate": 0.00017657919055991085, "loss": 0.8511, "step": 68080 }, { "epoch": 1.1954212679295635, "grad_norm": 0.07970029542741533, "learning_rate": 0.00017657199246334457, "loss": 0.849, "step": 68090 }, { "epoch": 1.1955968328095647, "grad_norm": 0.05417556330680199, "learning_rate": 0.00017656479340926663, "loss": 0.8584, "step": 68100 }, { "epoch": 1.1957723976895662, "grad_norm": 0.05900429878959442, "learning_rate": 0.00017655759339776832, "loss": 0.8542, "step": 68110 }, { "epoch": 1.1959479625695675, "grad_norm": 0.07054165013147308, "learning_rate": 0.0001765503924289408, "loss": 0.8494, "step": 68120 }, { "epoch": 1.196123527449569, "grad_norm": 0.06200845161570574, "learning_rate": 0.00017654319050287534, "loss": 0.862, "step": 68130 }, { "epoch": 1.1962990923295704, "grad_norm": 0.05370971021206481, "learning_rate": 0.00017653598761966318, "loss": 0.8454, "step": 68140 }, { "epoch": 1.196474657209572, "grad_norm": 0.05323392921959561, "learning_rate": 0.00017652878377939554, "loss": 0.8552, "step": 68150 }, { "epoch": 1.1966502220895732, "grad_norm": 0.054002722692070605, "learning_rate": 0.00017652157898216374, "loss": 0.8515, "step": 68160 }, { "epoch": 1.1968257869695746, "grad_norm": 0.07038399533168309, "learning_rate": 0.0001765143732280591, "loss": 0.8513, "step": 68170 }, { "epoch": 1.197001351849576, "grad_norm": 0.04416428959312981, "learning_rate": 0.00017650716651717286, "loss": 0.8424, "step": 68180 }, { "epoch": 1.1971769167295774, "grad_norm": 0.05559538231312577, "learning_rate": 0.00017649995884959637, "loss": 0.853, "step": 68190 }, { "epoch": 1.1973524816095789, "grad_norm": 0.05145914048051736, "learning_rate": 0.0001764927502254209, "loss": 0.847, "step": 68200 }, { "epoch": 1.1975280464895803, "grad_norm": 0.06211014738785306, "learning_rate": 0.00017648554064473784, "loss": 0.8545, "step": 68210 }, { "epoch": 1.1977036113695816, "grad_norm": 0.07459084554515544, "learning_rate": 0.0001764783301076385, "loss": 0.8536, "step": 68220 }, { "epoch": 1.197879176249583, "grad_norm": 0.0537059431786568, "learning_rate": 0.00017647111861421425, "loss": 0.8518, "step": 68230 }, { "epoch": 1.1980547411295843, "grad_norm": 0.05785505990875088, "learning_rate": 0.00017646390616455647, "loss": 0.8499, "step": 68240 }, { "epoch": 1.1982303060095858, "grad_norm": 0.06246533023340328, "learning_rate": 0.00017645669275875655, "loss": 0.8557, "step": 68250 }, { "epoch": 1.1984058708895873, "grad_norm": 0.05958125993282195, "learning_rate": 0.00017644947839690585, "loss": 0.8528, "step": 68260 }, { "epoch": 1.1985814357695888, "grad_norm": 0.05098095487199019, "learning_rate": 0.0001764422630790958, "loss": 0.8482, "step": 68270 }, { "epoch": 1.19875700064959, "grad_norm": 0.05291755069266746, "learning_rate": 0.0001764350468054178, "loss": 0.8481, "step": 68280 }, { "epoch": 1.1989325655295915, "grad_norm": 0.060165467055452324, "learning_rate": 0.00017642782957596337, "loss": 0.8498, "step": 68290 }, { "epoch": 1.1991081304095927, "grad_norm": 0.05686762719275186, "learning_rate": 0.00017642061139082381, "loss": 0.8518, "step": 68300 }, { "epoch": 1.1992836952895942, "grad_norm": 0.06483531609868058, "learning_rate": 0.00017641339225009066, "loss": 0.8541, "step": 68310 }, { "epoch": 1.1994592601695957, "grad_norm": 0.10581028368664525, "learning_rate": 0.00017640617215385543, "loss": 0.8498, "step": 68320 }, { "epoch": 1.1996348250495972, "grad_norm": 0.05621892980224658, "learning_rate": 0.00017639895110220947, "loss": 0.8554, "step": 68330 }, { "epoch": 1.1998103899295984, "grad_norm": 0.06533123213008375, "learning_rate": 0.0001763917290952444, "loss": 0.8538, "step": 68340 }, { "epoch": 1.1999859548096, "grad_norm": 0.05093549499353779, "learning_rate": 0.00017638450613305167, "loss": 0.8484, "step": 68350 }, { "epoch": 1.2001615196896014, "grad_norm": 0.06330282841375084, "learning_rate": 0.00017637728221572282, "loss": 0.8545, "step": 68360 }, { "epoch": 1.2003370845696026, "grad_norm": 0.05240208463101434, "learning_rate": 0.0001763700573433493, "loss": 0.8497, "step": 68370 }, { "epoch": 1.2005126494496041, "grad_norm": 0.06765163942644778, "learning_rate": 0.00017636283151602276, "loss": 0.8551, "step": 68380 }, { "epoch": 1.2006882143296056, "grad_norm": 0.057820337366809846, "learning_rate": 0.00017635560473383468, "loss": 0.853, "step": 68390 }, { "epoch": 1.2008637792096069, "grad_norm": 0.05864721140919555, "learning_rate": 0.00017634837699687664, "loss": 0.8397, "step": 68400 }, { "epoch": 1.2010393440896083, "grad_norm": 0.049423121354390184, "learning_rate": 0.00017634114830524024, "loss": 0.8525, "step": 68410 }, { "epoch": 1.2012149089696098, "grad_norm": 0.052339606757918246, "learning_rate": 0.00017633391865901703, "loss": 0.8503, "step": 68420 }, { "epoch": 1.201390473849611, "grad_norm": 0.08167167609397699, "learning_rate": 0.00017632668805829866, "loss": 0.8649, "step": 68430 }, { "epoch": 1.2015660387296125, "grad_norm": 0.08643592158088705, "learning_rate": 0.00017631945650317668, "loss": 0.8562, "step": 68440 }, { "epoch": 1.201741603609614, "grad_norm": 0.06307181493103463, "learning_rate": 0.0001763122239937428, "loss": 0.8508, "step": 68450 }, { "epoch": 1.2019171684896153, "grad_norm": 0.06508992987185001, "learning_rate": 0.00017630499053008857, "loss": 0.8502, "step": 68460 }, { "epoch": 1.2020927333696168, "grad_norm": 0.05472667063061287, "learning_rate": 0.0001762977561123057, "loss": 0.8463, "step": 68470 }, { "epoch": 1.2022682982496182, "grad_norm": 0.11361418310163382, "learning_rate": 0.00017629052074048581, "loss": 0.8527, "step": 68480 }, { "epoch": 1.2024438631296195, "grad_norm": 0.08261288138813609, "learning_rate": 0.00017628328441472062, "loss": 0.8524, "step": 68490 }, { "epoch": 1.202619428009621, "grad_norm": 0.06572393442417926, "learning_rate": 0.00017627604713510176, "loss": 0.8468, "step": 68500 }, { "epoch": 1.2027949928896224, "grad_norm": 0.0515639038572139, "learning_rate": 0.00017626880890172098, "loss": 0.848, "step": 68510 }, { "epoch": 1.2029705577696237, "grad_norm": 0.05899910414264789, "learning_rate": 0.00017626156971466993, "loss": 0.8432, "step": 68520 }, { "epoch": 1.2031461226496252, "grad_norm": 0.050252182457455385, "learning_rate": 0.0001762543295740404, "loss": 0.8416, "step": 68530 }, { "epoch": 1.2033216875296266, "grad_norm": 0.05361427005047982, "learning_rate": 0.0001762470884799241, "loss": 0.8432, "step": 68540 }, { "epoch": 1.203497252409628, "grad_norm": 0.06739472266624336, "learning_rate": 0.00017623984643241276, "loss": 0.8478, "step": 68550 }, { "epoch": 1.2036728172896294, "grad_norm": 0.07434965384391823, "learning_rate": 0.0001762326034315981, "loss": 0.8467, "step": 68560 }, { "epoch": 1.2038483821696309, "grad_norm": 0.0563125276162476, "learning_rate": 0.00017622535947757196, "loss": 0.8505, "step": 68570 }, { "epoch": 1.2040239470496321, "grad_norm": 0.07870919864403823, "learning_rate": 0.00017621811457042613, "loss": 0.8486, "step": 68580 }, { "epoch": 1.2041995119296336, "grad_norm": 0.055055760193395524, "learning_rate": 0.00017621086871025236, "loss": 0.8355, "step": 68590 }, { "epoch": 1.204375076809635, "grad_norm": 0.057501993248541355, "learning_rate": 0.00017620362189714247, "loss": 0.8515, "step": 68600 }, { "epoch": 1.2045506416896363, "grad_norm": 0.05400975171499392, "learning_rate": 0.0001761963741311883, "loss": 0.859, "step": 68610 }, { "epoch": 1.2047262065696378, "grad_norm": 0.06067898401756758, "learning_rate": 0.0001761891254124816, "loss": 0.8535, "step": 68620 }, { "epoch": 1.2049017714496393, "grad_norm": 0.06677549879473428, "learning_rate": 0.0001761818757411143, "loss": 0.8538, "step": 68630 }, { "epoch": 1.2050773363296405, "grad_norm": 0.04881873623776551, "learning_rate": 0.00017617462511717823, "loss": 0.8581, "step": 68640 }, { "epoch": 1.205252901209642, "grad_norm": 0.10137700518661401, "learning_rate": 0.00017616737354076524, "loss": 0.8499, "step": 68650 }, { "epoch": 1.2054284660896435, "grad_norm": 0.06607282623756078, "learning_rate": 0.00017616012101196723, "loss": 0.8408, "step": 68660 }, { "epoch": 1.2056040309696447, "grad_norm": 0.06373843028143356, "learning_rate": 0.0001761528675308761, "loss": 0.8477, "step": 68670 }, { "epoch": 1.2057795958496462, "grad_norm": 0.06663963043686887, "learning_rate": 0.00017614561309758372, "loss": 0.851, "step": 68680 }, { "epoch": 1.2059551607296477, "grad_norm": 0.06720741866042469, "learning_rate": 0.00017613835771218203, "loss": 0.8478, "step": 68690 }, { "epoch": 1.206130725609649, "grad_norm": 0.05400735399818228, "learning_rate": 0.0001761311013747629, "loss": 0.8536, "step": 68700 }, { "epoch": 1.2063062904896504, "grad_norm": 0.0798636377120103, "learning_rate": 0.00017612384408541838, "loss": 0.8545, "step": 68710 }, { "epoch": 1.206481855369652, "grad_norm": 0.05484729287176015, "learning_rate": 0.00017611658584424034, "loss": 0.8531, "step": 68720 }, { "epoch": 1.2066574202496532, "grad_norm": 0.06922675059744755, "learning_rate": 0.00017610932665132074, "loss": 0.8532, "step": 68730 }, { "epoch": 1.2068329851296546, "grad_norm": 0.07365080119479793, "learning_rate": 0.00017610206650675158, "loss": 0.8447, "step": 68740 }, { "epoch": 1.2070085500096561, "grad_norm": 0.06717944857332152, "learning_rate": 0.0001760948054106249, "loss": 0.8522, "step": 68750 }, { "epoch": 1.2071841148896574, "grad_norm": 0.062201782440058154, "learning_rate": 0.00017608754336303256, "loss": 0.8475, "step": 68760 }, { "epoch": 1.2073596797696589, "grad_norm": 0.07834124628873017, "learning_rate": 0.0001760802803640667, "loss": 0.8547, "step": 68770 }, { "epoch": 1.2075352446496603, "grad_norm": 0.07671479470001409, "learning_rate": 0.00017607301641381928, "loss": 0.8561, "step": 68780 }, { "epoch": 1.2077108095296616, "grad_norm": 0.05986118210793201, "learning_rate": 0.00017606575151238237, "loss": 0.8415, "step": 68790 }, { "epoch": 1.207886374409663, "grad_norm": 0.07079499477664508, "learning_rate": 0.00017605848565984802, "loss": 0.848, "step": 68800 }, { "epoch": 1.2080619392896645, "grad_norm": 0.06423413948055286, "learning_rate": 0.00017605121885630822, "loss": 0.8522, "step": 68810 }, { "epoch": 1.208237504169666, "grad_norm": 0.0720516966260325, "learning_rate": 0.00017604395110185512, "loss": 0.8489, "step": 68820 }, { "epoch": 1.2084130690496673, "grad_norm": 0.06757284254182908, "learning_rate": 0.0001760366823965808, "loss": 0.8534, "step": 68830 }, { "epoch": 1.2085886339296688, "grad_norm": 0.046862398851652526, "learning_rate": 0.0001760294127405773, "loss": 0.8519, "step": 68840 }, { "epoch": 1.20876419880967, "grad_norm": 0.0718801849989231, "learning_rate": 0.00017602214213393677, "loss": 0.8476, "step": 68850 }, { "epoch": 1.2089397636896715, "grad_norm": 0.05236892597359392, "learning_rate": 0.00017601487057675134, "loss": 0.8485, "step": 68860 }, { "epoch": 1.209115328569673, "grad_norm": 0.06187647303692835, "learning_rate": 0.0001760075980691131, "loss": 0.8477, "step": 68870 }, { "epoch": 1.2092908934496744, "grad_norm": 0.059152757544697315, "learning_rate": 0.00017600032461111422, "loss": 0.8472, "step": 68880 }, { "epoch": 1.2094664583296757, "grad_norm": 0.04876773528117157, "learning_rate": 0.00017599305020284686, "loss": 0.8509, "step": 68890 }, { "epoch": 1.2096420232096772, "grad_norm": 0.055939515251194664, "learning_rate": 0.0001759857748444032, "loss": 0.8492, "step": 68900 }, { "epoch": 1.2098175880896784, "grad_norm": 0.06287391137675026, "learning_rate": 0.00017597849853587537, "loss": 0.8541, "step": 68910 }, { "epoch": 1.20999315296968, "grad_norm": 0.0771239458067529, "learning_rate": 0.00017597122127735562, "loss": 0.8438, "step": 68920 }, { "epoch": 1.2101687178496814, "grad_norm": 0.052783618244214986, "learning_rate": 0.0001759639430689361, "loss": 0.8473, "step": 68930 }, { "epoch": 1.2103442827296829, "grad_norm": 0.060946031843426084, "learning_rate": 0.00017595666391070908, "loss": 0.8465, "step": 68940 }, { "epoch": 1.2105198476096841, "grad_norm": 0.04753286715365804, "learning_rate": 0.00017594938380276675, "loss": 0.8478, "step": 68950 }, { "epoch": 1.2106954124896856, "grad_norm": 0.077065519500785, "learning_rate": 0.00017594210274520135, "loss": 0.8447, "step": 68960 }, { "epoch": 1.2108709773696869, "grad_norm": 0.0998974496994921, "learning_rate": 0.00017593482073810518, "loss": 0.8486, "step": 68970 }, { "epoch": 1.2110465422496883, "grad_norm": 0.0517369365782822, "learning_rate": 0.00017592753778157048, "loss": 0.8468, "step": 68980 }, { "epoch": 1.2112221071296898, "grad_norm": 0.06963274750687529, "learning_rate": 0.00017592025387568947, "loss": 0.8481, "step": 68990 }, { "epoch": 1.2113976720096913, "grad_norm": 0.05734830437294415, "learning_rate": 0.00017591296902055452, "loss": 0.8462, "step": 69000 }, { "epoch": 1.2115732368896925, "grad_norm": 0.07209924523274788, "learning_rate": 0.00017590568321625786, "loss": 0.8473, "step": 69010 }, { "epoch": 1.211748801769694, "grad_norm": 0.05715778388323867, "learning_rate": 0.00017589839646289186, "loss": 0.8461, "step": 69020 }, { "epoch": 1.2119243666496953, "grad_norm": 0.06657130156472842, "learning_rate": 0.0001758911087605488, "loss": 0.8537, "step": 69030 }, { "epoch": 1.2120999315296968, "grad_norm": 0.06537961072635133, "learning_rate": 0.0001758838201093211, "loss": 0.8456, "step": 69040 }, { "epoch": 1.2122754964096982, "grad_norm": 0.05477249875765379, "learning_rate": 0.000175876530509301, "loss": 0.8433, "step": 69050 }, { "epoch": 1.2124510612896997, "grad_norm": 0.07339596471454082, "learning_rate": 0.00017586923996058094, "loss": 0.8431, "step": 69060 }, { "epoch": 1.212626626169701, "grad_norm": 0.09291578307297488, "learning_rate": 0.0001758619484632532, "loss": 0.8498, "step": 69070 }, { "epoch": 1.2128021910497024, "grad_norm": 0.06807497414678475, "learning_rate": 0.00017585465601741026, "loss": 0.8514, "step": 69080 }, { "epoch": 1.212977755929704, "grad_norm": 0.06670845153107383, "learning_rate": 0.00017584736262314448, "loss": 0.8439, "step": 69090 }, { "epoch": 1.2131533208097052, "grad_norm": 0.06457680334282899, "learning_rate": 0.00017584006828054829, "loss": 0.8436, "step": 69100 }, { "epoch": 1.2133288856897066, "grad_norm": 0.05035505281002478, "learning_rate": 0.00017583277298971405, "loss": 0.8492, "step": 69110 }, { "epoch": 1.2135044505697081, "grad_norm": 0.0447472579279613, "learning_rate": 0.00017582547675073427, "loss": 0.8523, "step": 69120 }, { "epoch": 1.2136800154497094, "grad_norm": 0.1687829848096704, "learning_rate": 0.00017581817956370137, "loss": 0.849, "step": 69130 }, { "epoch": 1.2138555803297109, "grad_norm": 0.053600931685913074, "learning_rate": 0.00017581088142870778, "loss": 0.8474, "step": 69140 }, { "epoch": 1.2140311452097123, "grad_norm": 0.04916754977571986, "learning_rate": 0.000175803582345846, "loss": 0.8465, "step": 69150 }, { "epoch": 1.2142067100897136, "grad_norm": 0.061195991225114654, "learning_rate": 0.00017579628231520846, "loss": 0.8466, "step": 69160 }, { "epoch": 1.214382274969715, "grad_norm": 0.06193755223709708, "learning_rate": 0.0001757889813368877, "loss": 0.8545, "step": 69170 }, { "epoch": 1.2145578398497165, "grad_norm": 0.06638544815263384, "learning_rate": 0.00017578167941097623, "loss": 0.8494, "step": 69180 }, { "epoch": 1.2147334047297178, "grad_norm": 0.07429519521728069, "learning_rate": 0.00017577437653756655, "loss": 0.8562, "step": 69190 }, { "epoch": 1.2149089696097193, "grad_norm": 0.054440028036854424, "learning_rate": 0.00017576707271675114, "loss": 0.8418, "step": 69200 }, { "epoch": 1.2150845344897208, "grad_norm": 0.06826029525942919, "learning_rate": 0.00017575976794862263, "loss": 0.8516, "step": 69210 }, { "epoch": 1.215260099369722, "grad_norm": 0.05298518108961945, "learning_rate": 0.00017575246223327354, "loss": 0.8511, "step": 69220 }, { "epoch": 1.2154356642497235, "grad_norm": 0.05732340920822943, "learning_rate": 0.00017574515557079642, "loss": 0.8494, "step": 69230 }, { "epoch": 1.215611229129725, "grad_norm": 0.05176755370599254, "learning_rate": 0.00017573784796128385, "loss": 0.8537, "step": 69240 }, { "epoch": 1.2157867940097262, "grad_norm": 0.050565333995817925, "learning_rate": 0.00017573053940482845, "loss": 0.8552, "step": 69250 }, { "epoch": 1.2159623588897277, "grad_norm": 0.06253849639103926, "learning_rate": 0.00017572322990152276, "loss": 0.8467, "step": 69260 }, { "epoch": 1.2161379237697292, "grad_norm": 0.07611928713742057, "learning_rate": 0.00017571591945145942, "loss": 0.8574, "step": 69270 }, { "epoch": 1.2163134886497304, "grad_norm": 0.06111267593519427, "learning_rate": 0.0001757086080547311, "loss": 0.8506, "step": 69280 }, { "epoch": 1.216489053529732, "grad_norm": 0.056705651497911, "learning_rate": 0.00017570129571143036, "loss": 0.8519, "step": 69290 }, { "epoch": 1.2166646184097334, "grad_norm": 0.0697326440456881, "learning_rate": 0.0001756939824216499, "loss": 0.8399, "step": 69300 }, { "epoch": 1.2168401832897346, "grad_norm": 0.07259754321131326, "learning_rate": 0.00017568666818548235, "loss": 0.8575, "step": 69310 }, { "epoch": 1.2170157481697361, "grad_norm": 0.06044859516082529, "learning_rate": 0.00017567935300302046, "loss": 0.8515, "step": 69320 }, { "epoch": 1.2171913130497376, "grad_norm": 0.06466935770307206, "learning_rate": 0.0001756720368743568, "loss": 0.8556, "step": 69330 }, { "epoch": 1.2173668779297389, "grad_norm": 0.06720824574142885, "learning_rate": 0.00017566471979958413, "loss": 0.8546, "step": 69340 }, { "epoch": 1.2175424428097403, "grad_norm": 0.06204501997183284, "learning_rate": 0.00017565740177879517, "loss": 0.8502, "step": 69350 }, { "epoch": 1.2177180076897418, "grad_norm": 0.0663108875589936, "learning_rate": 0.0001756500828120826, "loss": 0.8475, "step": 69360 }, { "epoch": 1.217893572569743, "grad_norm": 0.06643947748694637, "learning_rate": 0.00017564276289953917, "loss": 0.8408, "step": 69370 }, { "epoch": 1.2180691374497445, "grad_norm": 0.09005544439371192, "learning_rate": 0.00017563544204125765, "loss": 0.8488, "step": 69380 }, { "epoch": 1.218244702329746, "grad_norm": 0.08460806843913546, "learning_rate": 0.00017562812023733077, "loss": 0.856, "step": 69390 }, { "epoch": 1.2184202672097473, "grad_norm": 0.05844904347034901, "learning_rate": 0.0001756207974878513, "loss": 0.8487, "step": 69400 }, { "epoch": 1.2185958320897488, "grad_norm": 0.07833982090934612, "learning_rate": 0.00017561347379291202, "loss": 0.8518, "step": 69410 }, { "epoch": 1.2187713969697502, "grad_norm": 0.0659666374915996, "learning_rate": 0.00017560614915260577, "loss": 0.853, "step": 69420 }, { "epoch": 1.2189469618497515, "grad_norm": 0.060769665505354176, "learning_rate": 0.00017559882356702527, "loss": 0.8513, "step": 69430 }, { "epoch": 1.219122526729753, "grad_norm": 0.06731081089827386, "learning_rate": 0.00017559149703626336, "loss": 0.8482, "step": 69440 }, { "epoch": 1.2192980916097544, "grad_norm": 0.05637729025646567, "learning_rate": 0.00017558416956041292, "loss": 0.8438, "step": 69450 }, { "epoch": 1.2194736564897557, "grad_norm": 0.07932381785353959, "learning_rate": 0.00017557684113956676, "loss": 0.8427, "step": 69460 }, { "epoch": 1.2196492213697572, "grad_norm": 0.0564685907245687, "learning_rate": 0.00017556951177381767, "loss": 0.8448, "step": 69470 }, { "epoch": 1.2198247862497587, "grad_norm": 0.08613592688437455, "learning_rate": 0.00017556218146325866, "loss": 0.8507, "step": 69480 }, { "epoch": 1.22000035112976, "grad_norm": 0.056753718626496384, "learning_rate": 0.00017555485020798251, "loss": 0.846, "step": 69490 }, { "epoch": 1.2201759160097614, "grad_norm": 0.0542743191347832, "learning_rate": 0.00017554751800808206, "loss": 0.852, "step": 69500 }, { "epoch": 1.2203514808897629, "grad_norm": 0.06101160838009717, "learning_rate": 0.00017554018486365034, "loss": 0.8438, "step": 69510 }, { "epoch": 1.2205270457697641, "grad_norm": 0.06982711578743546, "learning_rate": 0.00017553285077478013, "loss": 0.848, "step": 69520 }, { "epoch": 1.2207026106497656, "grad_norm": 0.07448514184516164, "learning_rate": 0.00017552551574156444, "loss": 0.8495, "step": 69530 }, { "epoch": 1.220878175529767, "grad_norm": 0.05802176041991153, "learning_rate": 0.00017551817976409618, "loss": 0.8561, "step": 69540 }, { "epoch": 1.2210537404097686, "grad_norm": 0.06969236988038642, "learning_rate": 0.00017551084284246832, "loss": 0.8518, "step": 69550 }, { "epoch": 1.2212293052897698, "grad_norm": 0.059985478164959964, "learning_rate": 0.00017550350497677378, "loss": 0.8422, "step": 69560 }, { "epoch": 1.2214048701697713, "grad_norm": 0.07222582335168742, "learning_rate": 0.00017549616616710558, "loss": 0.8545, "step": 69570 }, { "epoch": 1.2215804350497725, "grad_norm": 0.08377697449244088, "learning_rate": 0.00017548882641355663, "loss": 0.8449, "step": 69580 }, { "epoch": 1.221755999929774, "grad_norm": 0.050569961934486665, "learning_rate": 0.00017548148571622002, "loss": 0.8484, "step": 69590 }, { "epoch": 1.2219315648097755, "grad_norm": 0.07039380942405787, "learning_rate": 0.0001754741440751887, "loss": 0.8484, "step": 69600 }, { "epoch": 1.222107129689777, "grad_norm": 0.06662121390827146, "learning_rate": 0.0001754668014905557, "loss": 0.8535, "step": 69610 }, { "epoch": 1.2222826945697782, "grad_norm": 0.055404857439450596, "learning_rate": 0.00017545945796241405, "loss": 0.8522, "step": 69620 }, { "epoch": 1.2224582594497797, "grad_norm": 0.08054613793592483, "learning_rate": 0.0001754521134908568, "loss": 0.861, "step": 69630 }, { "epoch": 1.222633824329781, "grad_norm": 0.06569557798653189, "learning_rate": 0.00017544476807597698, "loss": 0.8545, "step": 69640 }, { "epoch": 1.2228093892097824, "grad_norm": 0.05668004703130368, "learning_rate": 0.00017543742171786773, "loss": 0.8506, "step": 69650 }, { "epoch": 1.222984954089784, "grad_norm": 0.04982552782777401, "learning_rate": 0.00017543007441662205, "loss": 0.8486, "step": 69660 }, { "epoch": 1.2231605189697854, "grad_norm": 0.07903628729005291, "learning_rate": 0.00017542272617233307, "loss": 0.8539, "step": 69670 }, { "epoch": 1.2233360838497866, "grad_norm": 0.06854061156159683, "learning_rate": 0.00017541537698509389, "loss": 0.8523, "step": 69680 }, { "epoch": 1.2235116487297881, "grad_norm": 0.06921236768269293, "learning_rate": 0.0001754080268549976, "loss": 0.8457, "step": 69690 }, { "epoch": 1.2236872136097894, "grad_norm": 0.07467007263462531, "learning_rate": 0.00017540067578213734, "loss": 0.8401, "step": 69700 }, { "epoch": 1.2238627784897909, "grad_norm": 0.06442352935390228, "learning_rate": 0.00017539332376660628, "loss": 0.8613, "step": 69710 }, { "epoch": 1.2240383433697923, "grad_norm": 0.05448942160776433, "learning_rate": 0.00017538597080849757, "loss": 0.8552, "step": 69720 }, { "epoch": 1.2242139082497938, "grad_norm": 0.07500490877314263, "learning_rate": 0.00017537861690790433, "loss": 0.8509, "step": 69730 }, { "epoch": 1.224389473129795, "grad_norm": 0.05658754517262798, "learning_rate": 0.00017537126206491977, "loss": 0.8424, "step": 69740 }, { "epoch": 1.2245650380097965, "grad_norm": 0.062475016919624676, "learning_rate": 0.00017536390627963708, "loss": 0.8457, "step": 69750 }, { "epoch": 1.2247406028897978, "grad_norm": 0.049412015644022936, "learning_rate": 0.0001753565495521494, "loss": 0.854, "step": 69760 }, { "epoch": 1.2249161677697993, "grad_norm": 0.0547648962993172, "learning_rate": 0.00017534919188255004, "loss": 0.8436, "step": 69770 }, { "epoch": 1.2250917326498008, "grad_norm": 0.20710294204698404, "learning_rate": 0.00017534183327093215, "loss": 0.8446, "step": 69780 }, { "epoch": 1.2252672975298022, "grad_norm": 0.06538102298317117, "learning_rate": 0.000175334473717389, "loss": 0.8506, "step": 69790 }, { "epoch": 1.2254428624098035, "grad_norm": 0.05908137729345147, "learning_rate": 0.00017532711322201377, "loss": 0.8393, "step": 69800 }, { "epoch": 1.225618427289805, "grad_norm": 0.07649036914146527, "learning_rate": 0.0001753197517848998, "loss": 0.8561, "step": 69810 }, { "epoch": 1.2257939921698064, "grad_norm": 0.06403974574395713, "learning_rate": 0.00017531238940614034, "loss": 0.8492, "step": 69820 }, { "epoch": 1.2259695570498077, "grad_norm": 0.04567697363957614, "learning_rate": 0.00017530502608582865, "loss": 0.8512, "step": 69830 }, { "epoch": 1.2261451219298092, "grad_norm": 0.06557015337303851, "learning_rate": 0.00017529766182405808, "loss": 0.8474, "step": 69840 }, { "epoch": 1.2263206868098107, "grad_norm": 0.05434518599917705, "learning_rate": 0.00017529029662092185, "loss": 0.8484, "step": 69850 }, { "epoch": 1.226496251689812, "grad_norm": 0.05130546265250548, "learning_rate": 0.00017528293047651335, "loss": 0.8531, "step": 69860 }, { "epoch": 1.2266718165698134, "grad_norm": 0.08313390214601013, "learning_rate": 0.00017527556339092586, "loss": 0.8476, "step": 69870 }, { "epoch": 1.2268473814498149, "grad_norm": 0.11818272508901662, "learning_rate": 0.00017526819536425273, "loss": 0.8527, "step": 69880 }, { "epoch": 1.2270229463298161, "grad_norm": 0.05937280520432867, "learning_rate": 0.00017526082639658736, "loss": 0.8529, "step": 69890 }, { "epoch": 1.2271985112098176, "grad_norm": 0.07258374091354232, "learning_rate": 0.0001752534564880231, "loss": 0.8453, "step": 69900 }, { "epoch": 1.227374076089819, "grad_norm": 0.07756297199555673, "learning_rate": 0.00017524608563865334, "loss": 0.8513, "step": 69910 }, { "epoch": 1.2275496409698203, "grad_norm": 0.05194453079252272, "learning_rate": 0.00017523871384857138, "loss": 0.8499, "step": 69920 }, { "epoch": 1.2277252058498218, "grad_norm": 0.08650559834841534, "learning_rate": 0.00017523134111787072, "loss": 0.8575, "step": 69930 }, { "epoch": 1.2279007707298233, "grad_norm": 0.05712853446211042, "learning_rate": 0.00017522396744664473, "loss": 0.8557, "step": 69940 }, { "epoch": 1.2280763356098245, "grad_norm": 0.08010143545451982, "learning_rate": 0.00017521659283498687, "loss": 0.842, "step": 69950 }, { "epoch": 1.228251900489826, "grad_norm": 0.06748136514028745, "learning_rate": 0.00017520921728299055, "loss": 0.8607, "step": 69960 }, { "epoch": 1.2284274653698275, "grad_norm": 0.06069663903030926, "learning_rate": 0.00017520184079074922, "loss": 0.8476, "step": 69970 }, { "epoch": 1.2286030302498288, "grad_norm": 0.06191603647885196, "learning_rate": 0.00017519446335835629, "loss": 0.8526, "step": 69980 }, { "epoch": 1.2287785951298302, "grad_norm": 0.04624608149921816, "learning_rate": 0.00017518708498590537, "loss": 0.8577, "step": 69990 }, { "epoch": 1.2289541600098317, "grad_norm": 0.053365300795504236, "learning_rate": 0.00017517970567348983, "loss": 0.8421, "step": 70000 }, { "epoch": 1.229129724889833, "grad_norm": 0.06375302158917526, "learning_rate": 0.00017517232542120322, "loss": 0.8464, "step": 70010 }, { "epoch": 1.2293052897698344, "grad_norm": 0.05350302558558072, "learning_rate": 0.00017516494422913903, "loss": 0.8554, "step": 70020 }, { "epoch": 1.229480854649836, "grad_norm": 0.09020340353536971, "learning_rate": 0.00017515756209739077, "loss": 0.8387, "step": 70030 }, { "epoch": 1.2296564195298372, "grad_norm": 0.05714395117296419, "learning_rate": 0.000175150179026052, "loss": 0.8455, "step": 70040 }, { "epoch": 1.2298319844098387, "grad_norm": 0.05666711208781458, "learning_rate": 0.00017514279501521624, "loss": 0.846, "step": 70050 }, { "epoch": 1.2300075492898401, "grad_norm": 0.04725293871285169, "learning_rate": 0.00017513541006497705, "loss": 0.8445, "step": 70060 }, { "epoch": 1.2301831141698414, "grad_norm": 0.057045320790698496, "learning_rate": 0.00017512802417542802, "loss": 0.8548, "step": 70070 }, { "epoch": 1.2303586790498429, "grad_norm": 0.0640004154525448, "learning_rate": 0.00017512063734666271, "loss": 0.8493, "step": 70080 }, { "epoch": 1.2305342439298443, "grad_norm": 0.05146079331483483, "learning_rate": 0.0001751132495787747, "loss": 0.8488, "step": 70090 }, { "epoch": 1.2307098088098456, "grad_norm": 0.057560968828308585, "learning_rate": 0.00017510586087185768, "loss": 0.845, "step": 70100 }, { "epoch": 1.230885373689847, "grad_norm": 0.06845261589932354, "learning_rate": 0.0001750984712260051, "loss": 0.8479, "step": 70110 }, { "epoch": 1.2310609385698486, "grad_norm": 0.056816968951460045, "learning_rate": 0.0001750910806413108, "loss": 0.8479, "step": 70120 }, { "epoch": 1.2312365034498498, "grad_norm": 0.054837835115689196, "learning_rate": 0.00017508368911786824, "loss": 0.8488, "step": 70130 }, { "epoch": 1.2314120683298513, "grad_norm": 0.056291010481640674, "learning_rate": 0.00017507629665577113, "loss": 0.8482, "step": 70140 }, { "epoch": 1.2315876332098528, "grad_norm": 0.05845934635395875, "learning_rate": 0.00017506890325511319, "loss": 0.8614, "step": 70150 }, { "epoch": 1.231763198089854, "grad_norm": 0.05408887999555541, "learning_rate": 0.00017506150891598803, "loss": 0.8498, "step": 70160 }, { "epoch": 1.2319387629698555, "grad_norm": 0.05021042109400439, "learning_rate": 0.0001750541136384894, "loss": 0.8497, "step": 70170 }, { "epoch": 1.232114327849857, "grad_norm": 0.04948423853027652, "learning_rate": 0.00017504671742271088, "loss": 0.8525, "step": 70180 }, { "epoch": 1.2322898927298582, "grad_norm": 0.07534161840780414, "learning_rate": 0.0001750393202687463, "loss": 0.854, "step": 70190 }, { "epoch": 1.2324654576098597, "grad_norm": 0.060948614981680804, "learning_rate": 0.00017503192217668937, "loss": 0.8484, "step": 70200 }, { "epoch": 1.2326410224898612, "grad_norm": 0.07597228474402204, "learning_rate": 0.00017502452314663373, "loss": 0.8422, "step": 70210 }, { "epoch": 1.2328165873698624, "grad_norm": 0.049638931513817354, "learning_rate": 0.00017501712317867323, "loss": 0.8431, "step": 70220 }, { "epoch": 1.232992152249864, "grad_norm": 0.07627831424753798, "learning_rate": 0.0001750097222729016, "loss": 0.8492, "step": 70230 }, { "epoch": 1.2331677171298654, "grad_norm": 0.057685309147513776, "learning_rate": 0.00017500232042941258, "loss": 0.843, "step": 70240 }, { "epoch": 1.2333432820098666, "grad_norm": 0.049723141874490596, "learning_rate": 0.00017499491764830002, "loss": 0.8439, "step": 70250 }, { "epoch": 1.2335188468898681, "grad_norm": 0.08772333335035615, "learning_rate": 0.00017498751392965763, "loss": 0.8447, "step": 70260 }, { "epoch": 1.2336944117698696, "grad_norm": 0.06887623269254212, "learning_rate": 0.00017498010927357932, "loss": 0.8548, "step": 70270 }, { "epoch": 1.233869976649871, "grad_norm": 0.06025074210650839, "learning_rate": 0.00017497270368015876, "loss": 0.8519, "step": 70280 }, { "epoch": 1.2340455415298723, "grad_norm": 0.05841854756976415, "learning_rate": 0.00017496529714948995, "loss": 0.8442, "step": 70290 }, { "epoch": 1.2342211064098738, "grad_norm": 0.07394819924179563, "learning_rate": 0.0001749578896816666, "loss": 0.8427, "step": 70300 }, { "epoch": 1.234396671289875, "grad_norm": 0.05066939957284587, "learning_rate": 0.00017495048127678264, "loss": 0.8478, "step": 70310 }, { "epoch": 1.2345722361698765, "grad_norm": 0.08073720996547826, "learning_rate": 0.00017494307193493188, "loss": 0.8503, "step": 70320 }, { "epoch": 1.234747801049878, "grad_norm": 0.07042337854482139, "learning_rate": 0.00017493566165620826, "loss": 0.859, "step": 70330 }, { "epoch": 1.2349233659298795, "grad_norm": 0.06837233541505913, "learning_rate": 0.00017492825044070564, "loss": 0.8505, "step": 70340 }, { "epoch": 1.2350989308098808, "grad_norm": 0.06698661653085462, "learning_rate": 0.00017492083828851788, "loss": 0.8567, "step": 70350 }, { "epoch": 1.2352744956898822, "grad_norm": 0.06614409532117663, "learning_rate": 0.00017491342519973895, "loss": 0.8515, "step": 70360 }, { "epoch": 1.2354500605698835, "grad_norm": 0.06471652033636478, "learning_rate": 0.00017490601117446276, "loss": 0.8477, "step": 70370 }, { "epoch": 1.235625625449885, "grad_norm": 0.09065022812924818, "learning_rate": 0.00017489859621278324, "loss": 0.8493, "step": 70380 }, { "epoch": 1.2358011903298864, "grad_norm": 0.06407011216079457, "learning_rate": 0.00017489118031479436, "loss": 0.8488, "step": 70390 }, { "epoch": 1.235976755209888, "grad_norm": 0.0771491240113959, "learning_rate": 0.00017488376348059005, "loss": 0.8506, "step": 70400 }, { "epoch": 1.2361523200898892, "grad_norm": 0.06372160846360474, "learning_rate": 0.0001748763457102643, "loss": 0.8549, "step": 70410 }, { "epoch": 1.2363278849698907, "grad_norm": 0.06745477697592123, "learning_rate": 0.0001748689270039111, "loss": 0.8537, "step": 70420 }, { "epoch": 1.236503449849892, "grad_norm": 0.061698903589906214, "learning_rate": 0.00017486150736162443, "loss": 0.8472, "step": 70430 }, { "epoch": 1.2366790147298934, "grad_norm": 0.0572284764718278, "learning_rate": 0.00017485408678349828, "loss": 0.8528, "step": 70440 }, { "epoch": 1.2368545796098949, "grad_norm": 0.05135653843910542, "learning_rate": 0.00017484666526962675, "loss": 0.8467, "step": 70450 }, { "epoch": 1.2370301444898963, "grad_norm": 0.0664857801551169, "learning_rate": 0.0001748392428201038, "loss": 0.857, "step": 70460 }, { "epoch": 1.2372057093698976, "grad_norm": 0.06218381747685543, "learning_rate": 0.0001748318194350235, "loss": 0.852, "step": 70470 }, { "epoch": 1.237381274249899, "grad_norm": 0.05046606942055646, "learning_rate": 0.00017482439511447984, "loss": 0.8487, "step": 70480 }, { "epoch": 1.2375568391299003, "grad_norm": 0.07261251853068028, "learning_rate": 0.000174816969858567, "loss": 0.8475, "step": 70490 }, { "epoch": 1.2377324040099018, "grad_norm": 0.05705039149602674, "learning_rate": 0.000174809543667379, "loss": 0.8483, "step": 70500 }, { "epoch": 1.2379079688899033, "grad_norm": 0.05744975802254922, "learning_rate": 0.0001748021165410099, "loss": 0.8524, "step": 70510 }, { "epoch": 1.2380835337699048, "grad_norm": 0.04942967934115302, "learning_rate": 0.00017479468847955387, "loss": 0.8438, "step": 70520 }, { "epoch": 1.238259098649906, "grad_norm": 0.0529200285852686, "learning_rate": 0.000174787259483105, "loss": 0.8443, "step": 70530 }, { "epoch": 1.2384346635299075, "grad_norm": 0.07003674977585658, "learning_rate": 0.0001747798295517574, "loss": 0.8584, "step": 70540 }, { "epoch": 1.238610228409909, "grad_norm": 0.0659436832989422, "learning_rate": 0.0001747723986856052, "loss": 0.8509, "step": 70550 }, { "epoch": 1.2387857932899102, "grad_norm": 0.06758444623126689, "learning_rate": 0.0001747649668847426, "loss": 0.8394, "step": 70560 }, { "epoch": 1.2389613581699117, "grad_norm": 0.05759868103867673, "learning_rate": 0.00017475753414926371, "loss": 0.8494, "step": 70570 }, { "epoch": 1.2391369230499132, "grad_norm": 0.06909016873809708, "learning_rate": 0.00017475010047926276, "loss": 0.8481, "step": 70580 }, { "epoch": 1.2393124879299144, "grad_norm": 0.062322926438455516, "learning_rate": 0.00017474266587483387, "loss": 0.8484, "step": 70590 }, { "epoch": 1.239488052809916, "grad_norm": 0.05745948533557679, "learning_rate": 0.0001747352303360713, "loss": 0.8507, "step": 70600 }, { "epoch": 1.2396636176899174, "grad_norm": 0.0738878811538337, "learning_rate": 0.0001747277938630692, "loss": 0.8534, "step": 70610 }, { "epoch": 1.2398391825699187, "grad_norm": 0.055996679486077755, "learning_rate": 0.00017472035645592183, "loss": 0.8524, "step": 70620 }, { "epoch": 1.2400147474499201, "grad_norm": 0.0632894600364455, "learning_rate": 0.00017471291811472342, "loss": 0.852, "step": 70630 }, { "epoch": 1.2401903123299216, "grad_norm": 0.06862150495351448, "learning_rate": 0.00017470547883956825, "loss": 0.8519, "step": 70640 }, { "epoch": 1.2403658772099229, "grad_norm": 0.047142137257782664, "learning_rate": 0.0001746980386305505, "loss": 0.8553, "step": 70650 }, { "epoch": 1.2405414420899243, "grad_norm": 0.06174881842513459, "learning_rate": 0.00017469059748776445, "loss": 0.853, "step": 70660 }, { "epoch": 1.2407170069699258, "grad_norm": 0.06577101742433711, "learning_rate": 0.00017468315541130445, "loss": 0.847, "step": 70670 }, { "epoch": 1.240892571849927, "grad_norm": 0.07198566263399268, "learning_rate": 0.00017467571240126472, "loss": 0.8499, "step": 70680 }, { "epoch": 1.2410681367299286, "grad_norm": 0.061125696759248116, "learning_rate": 0.0001746682684577396, "loss": 0.848, "step": 70690 }, { "epoch": 1.24124370160993, "grad_norm": 0.07351507050691881, "learning_rate": 0.00017466082358082342, "loss": 0.8482, "step": 70700 }, { "epoch": 1.2414192664899313, "grad_norm": 0.061026096355788685, "learning_rate": 0.00017465337777061048, "loss": 0.8482, "step": 70710 }, { "epoch": 1.2415948313699328, "grad_norm": 0.04879252720602575, "learning_rate": 0.00017464593102719514, "loss": 0.8514, "step": 70720 }, { "epoch": 1.2417703962499342, "grad_norm": 0.05941037682008558, "learning_rate": 0.0001746384833506717, "loss": 0.8482, "step": 70730 }, { "epoch": 1.2419459611299355, "grad_norm": 0.06936732893192446, "learning_rate": 0.00017463103474113457, "loss": 0.8514, "step": 70740 }, { "epoch": 1.242121526009937, "grad_norm": 0.05680575683579412, "learning_rate": 0.00017462358519867817, "loss": 0.8472, "step": 70750 }, { "epoch": 1.2422970908899384, "grad_norm": 0.05068548345993255, "learning_rate": 0.00017461613472339677, "loss": 0.8483, "step": 70760 }, { "epoch": 1.2424726557699397, "grad_norm": 0.05453043289697494, "learning_rate": 0.00017460868331538489, "loss": 0.8456, "step": 70770 }, { "epoch": 1.2426482206499412, "grad_norm": 0.0863670922665748, "learning_rate": 0.00017460123097473685, "loss": 0.8442, "step": 70780 }, { "epoch": 1.2428237855299427, "grad_norm": 0.05510072298296477, "learning_rate": 0.00017459377770154714, "loss": 0.8557, "step": 70790 }, { "epoch": 1.242999350409944, "grad_norm": 0.05307542057323678, "learning_rate": 0.0001745863234959101, "loss": 0.8485, "step": 70800 }, { "epoch": 1.2431749152899454, "grad_norm": 0.08164823524092528, "learning_rate": 0.00017457886835792029, "loss": 0.8493, "step": 70810 }, { "epoch": 1.2433504801699469, "grad_norm": 0.0660909094560127, "learning_rate": 0.00017457141228767212, "loss": 0.8473, "step": 70820 }, { "epoch": 1.2435260450499481, "grad_norm": 0.054728226637645476, "learning_rate": 0.00017456395528526002, "loss": 0.848, "step": 70830 }, { "epoch": 1.2437016099299496, "grad_norm": 0.05591165377019224, "learning_rate": 0.00017455649735077854, "loss": 0.8453, "step": 70840 }, { "epoch": 1.243877174809951, "grad_norm": 0.047877847652184814, "learning_rate": 0.00017454903848432215, "loss": 0.8461, "step": 70850 }, { "epoch": 1.2440527396899523, "grad_norm": 0.05323197383776144, "learning_rate": 0.00017454157868598532, "loss": 0.8486, "step": 70860 }, { "epoch": 1.2442283045699538, "grad_norm": 0.05526286192030539, "learning_rate": 0.0001745341179558626, "loss": 0.855, "step": 70870 }, { "epoch": 1.2444038694499553, "grad_norm": 0.06282451069475004, "learning_rate": 0.0001745266562940485, "loss": 0.8532, "step": 70880 }, { "epoch": 1.2445794343299565, "grad_norm": 0.05436100878952081, "learning_rate": 0.0001745191937006376, "loss": 0.8503, "step": 70890 }, { "epoch": 1.244754999209958, "grad_norm": 0.05525344173821724, "learning_rate": 0.0001745117301757244, "loss": 0.8547, "step": 70900 }, { "epoch": 1.2449305640899595, "grad_norm": 0.06844423838826798, "learning_rate": 0.0001745042657194035, "loss": 0.8491, "step": 70910 }, { "epoch": 1.2451061289699608, "grad_norm": 0.05967991118706802, "learning_rate": 0.00017449680033176948, "loss": 0.8525, "step": 70920 }, { "epoch": 1.2452816938499622, "grad_norm": 0.05788535414473954, "learning_rate": 0.0001744893340129169, "loss": 0.8565, "step": 70930 }, { "epoch": 1.2454572587299637, "grad_norm": 0.09561551243364096, "learning_rate": 0.00017448186676294038, "loss": 0.8458, "step": 70940 }, { "epoch": 1.245632823609965, "grad_norm": 0.062307128708685156, "learning_rate": 0.00017447439858193453, "loss": 0.8526, "step": 70950 }, { "epoch": 1.2458083884899664, "grad_norm": 0.1266215381305119, "learning_rate": 0.00017446692946999397, "loss": 0.8426, "step": 70960 }, { "epoch": 1.245983953369968, "grad_norm": 0.04754469088505023, "learning_rate": 0.0001744594594272133, "loss": 0.8473, "step": 70970 }, { "epoch": 1.2461595182499692, "grad_norm": 0.05318588050604438, "learning_rate": 0.00017445198845368726, "loss": 0.8507, "step": 70980 }, { "epoch": 1.2463350831299707, "grad_norm": 0.047214477480335686, "learning_rate": 0.00017444451654951039, "loss": 0.8556, "step": 70990 }, { "epoch": 1.2465106480099721, "grad_norm": 0.06652059786883814, "learning_rate": 0.00017443704371477748, "loss": 0.8499, "step": 71000 }, { "epoch": 1.2466862128899736, "grad_norm": 0.06874009624408782, "learning_rate": 0.00017442956994958311, "loss": 0.8439, "step": 71010 }, { "epoch": 1.2468617777699749, "grad_norm": 0.06496633284639548, "learning_rate": 0.00017442209525402201, "loss": 0.8604, "step": 71020 }, { "epoch": 1.2470373426499763, "grad_norm": 0.057381389808009074, "learning_rate": 0.00017441461962818891, "loss": 0.8476, "step": 71030 }, { "epoch": 1.2472129075299776, "grad_norm": 0.060711137281949976, "learning_rate": 0.00017440714307217852, "loss": 0.8564, "step": 71040 }, { "epoch": 1.247388472409979, "grad_norm": 0.07107096087488948, "learning_rate": 0.00017439966558608554, "loss": 0.8441, "step": 71050 }, { "epoch": 1.2475640372899806, "grad_norm": 0.06098899304150364, "learning_rate": 0.00017439218717000472, "loss": 0.8451, "step": 71060 }, { "epoch": 1.247739602169982, "grad_norm": 0.07390376797116668, "learning_rate": 0.00017438470782403084, "loss": 0.8491, "step": 71070 }, { "epoch": 1.2479151670499833, "grad_norm": 0.06814123298171698, "learning_rate": 0.00017437722754825864, "loss": 0.8445, "step": 71080 }, { "epoch": 1.2480907319299848, "grad_norm": 0.059562274164934516, "learning_rate": 0.00017436974634278292, "loss": 0.8493, "step": 71090 }, { "epoch": 1.248266296809986, "grad_norm": 0.049702607298005186, "learning_rate": 0.00017436226420769843, "loss": 0.8545, "step": 71100 }, { "epoch": 1.2484418616899875, "grad_norm": 0.05255299711406666, "learning_rate": 0.0001743547811431, "loss": 0.8416, "step": 71110 }, { "epoch": 1.248617426569989, "grad_norm": 0.053848288834945614, "learning_rate": 0.00017434729714908246, "loss": 0.8458, "step": 71120 }, { "epoch": 1.2487929914499905, "grad_norm": 0.06590928651277758, "learning_rate": 0.00017433981222574055, "loss": 0.8501, "step": 71130 }, { "epoch": 1.2489685563299917, "grad_norm": 0.05043799682951517, "learning_rate": 0.00017433232637316924, "loss": 0.8393, "step": 71140 }, { "epoch": 1.2491441212099932, "grad_norm": 0.07898123945906525, "learning_rate": 0.00017432483959146325, "loss": 0.8528, "step": 71150 }, { "epoch": 1.2493196860899944, "grad_norm": 0.061491943679739314, "learning_rate": 0.00017431735188071752, "loss": 0.8512, "step": 71160 }, { "epoch": 1.249495250969996, "grad_norm": 0.051860288119397295, "learning_rate": 0.00017430986324102688, "loss": 0.8544, "step": 71170 }, { "epoch": 1.2496708158499974, "grad_norm": 0.053404323196867345, "learning_rate": 0.0001743023736724862, "loss": 0.8493, "step": 71180 }, { "epoch": 1.2498463807299989, "grad_norm": 0.07860801462238394, "learning_rate": 0.0001742948831751904, "loss": 0.8401, "step": 71190 }, { "epoch": 1.2500219456100001, "grad_norm": 0.06833293188677632, "learning_rate": 0.00017428739174923442, "loss": 0.8563, "step": 71200 }, { "epoch": 1.2501975104900016, "grad_norm": 0.053137995488233856, "learning_rate": 0.0001742798993947131, "loss": 0.8509, "step": 71210 }, { "epoch": 1.2503730753700029, "grad_norm": 0.070047305010806, "learning_rate": 0.00017427240611172143, "loss": 0.8512, "step": 71220 }, { "epoch": 1.2505486402500043, "grad_norm": 0.06353102330969142, "learning_rate": 0.0001742649119003543, "loss": 0.8533, "step": 71230 }, { "epoch": 1.2507242051300058, "grad_norm": 0.07352829558695258, "learning_rate": 0.00017425741676070674, "loss": 0.8489, "step": 71240 }, { "epoch": 1.2508997700100073, "grad_norm": 0.06248178545920373, "learning_rate": 0.00017424992069287366, "loss": 0.8482, "step": 71250 }, { "epoch": 1.2510753348900086, "grad_norm": 0.07637779754864395, "learning_rate": 0.00017424242369695005, "loss": 0.8565, "step": 71260 }, { "epoch": 1.25125089977001, "grad_norm": 0.07937740898637931, "learning_rate": 0.00017423492577303088, "loss": 0.8469, "step": 71270 }, { "epoch": 1.2514264646500113, "grad_norm": 0.07784489406771485, "learning_rate": 0.00017422742692121116, "loss": 0.8467, "step": 71280 }, { "epoch": 1.2516020295300128, "grad_norm": 0.08986918358172007, "learning_rate": 0.00017421992714158592, "loss": 0.8418, "step": 71290 }, { "epoch": 1.2517775944100142, "grad_norm": 0.08553028143359326, "learning_rate": 0.00017421242643425017, "loss": 0.8516, "step": 71300 }, { "epoch": 1.2519531592900157, "grad_norm": 0.055998971431877764, "learning_rate": 0.00017420492479929895, "loss": 0.8491, "step": 71310 }, { "epoch": 1.252128724170017, "grad_norm": 0.051181080075843675, "learning_rate": 0.0001741974222368273, "loss": 0.8509, "step": 71320 }, { "epoch": 1.2523042890500184, "grad_norm": 0.06671159670498754, "learning_rate": 0.00017418991874693028, "loss": 0.8529, "step": 71330 }, { "epoch": 1.2524798539300197, "grad_norm": 0.0906317826475553, "learning_rate": 0.00017418241432970295, "loss": 0.8478, "step": 71340 }, { "epoch": 1.2526554188100212, "grad_norm": 0.0493578496941245, "learning_rate": 0.0001741749089852404, "loss": 0.8441, "step": 71350 }, { "epoch": 1.2528309836900227, "grad_norm": 0.07040135865410757, "learning_rate": 0.00017416740271363777, "loss": 0.852, "step": 71360 }, { "epoch": 1.2530065485700241, "grad_norm": 0.0587252911181794, "learning_rate": 0.0001741598955149901, "loss": 0.8509, "step": 71370 }, { "epoch": 1.2531821134500254, "grad_norm": 0.08036325179727981, "learning_rate": 0.00017415238738939255, "loss": 0.8481, "step": 71380 }, { "epoch": 1.2533576783300269, "grad_norm": 0.04560209005454114, "learning_rate": 0.00017414487833694023, "loss": 0.8492, "step": 71390 }, { "epoch": 1.2535332432100283, "grad_norm": 0.05682358485516191, "learning_rate": 0.00017413736835772823, "loss": 0.8485, "step": 71400 }, { "epoch": 1.2537088080900296, "grad_norm": 0.11139356243448319, "learning_rate": 0.00017412985745185182, "loss": 0.8523, "step": 71410 }, { "epoch": 1.253884372970031, "grad_norm": 0.08922348385964067, "learning_rate": 0.00017412234561940608, "loss": 0.8448, "step": 71420 }, { "epoch": 1.2540599378500326, "grad_norm": 0.06361566463303313, "learning_rate": 0.00017411483286048618, "loss": 0.8543, "step": 71430 }, { "epoch": 1.2542355027300338, "grad_norm": 0.06943448399206519, "learning_rate": 0.00017410731917518737, "loss": 0.8493, "step": 71440 }, { "epoch": 1.2544110676100353, "grad_norm": 0.0658537153105197, "learning_rate": 0.00017409980456360483, "loss": 0.8514, "step": 71450 }, { "epoch": 1.2545866324900368, "grad_norm": 0.05314740074453562, "learning_rate": 0.0001740922890258337, "loss": 0.8399, "step": 71460 }, { "epoch": 1.254762197370038, "grad_norm": 0.08451334726423906, "learning_rate": 0.0001740847725619693, "loss": 0.8476, "step": 71470 }, { "epoch": 1.2549377622500395, "grad_norm": 0.07429359906106023, "learning_rate": 0.00017407725517210683, "loss": 0.8506, "step": 71480 }, { "epoch": 1.255113327130041, "grad_norm": 0.053321225304388195, "learning_rate": 0.00017406973685634152, "loss": 0.8419, "step": 71490 }, { "epoch": 1.2552888920100422, "grad_norm": 0.05090160019938407, "learning_rate": 0.00017406221761476863, "loss": 0.839, "step": 71500 }, { "epoch": 1.2554644568900437, "grad_norm": 0.09109227816624284, "learning_rate": 0.00017405469744748343, "loss": 0.8428, "step": 71510 }, { "epoch": 1.2556400217700452, "grad_norm": 0.06279216973962057, "learning_rate": 0.00017404717635458123, "loss": 0.8466, "step": 71520 }, { "epoch": 1.2558155866500464, "grad_norm": 0.07770183779370252, "learning_rate": 0.00017403965433615732, "loss": 0.8579, "step": 71530 }, { "epoch": 1.255991151530048, "grad_norm": 0.04916857738109744, "learning_rate": 0.000174032131392307, "loss": 0.8524, "step": 71540 }, { "epoch": 1.2561667164100494, "grad_norm": 0.05409344758576986, "learning_rate": 0.00017402460752312553, "loss": 0.8446, "step": 71550 }, { "epoch": 1.2563422812900507, "grad_norm": 0.0726233153289712, "learning_rate": 0.00017401708272870832, "loss": 0.851, "step": 71560 }, { "epoch": 1.2565178461700521, "grad_norm": 0.057366658486299925, "learning_rate": 0.0001740095570091507, "loss": 0.8524, "step": 71570 }, { "epoch": 1.2566934110500536, "grad_norm": 0.0632896811212927, "learning_rate": 0.00017400203036454793, "loss": 0.8499, "step": 71580 }, { "epoch": 1.2568689759300549, "grad_norm": 0.06916350002442036, "learning_rate": 0.0001739945027949955, "loss": 0.8533, "step": 71590 }, { "epoch": 1.2570445408100563, "grad_norm": 0.06488734058309144, "learning_rate": 0.00017398697430058872, "loss": 0.8535, "step": 71600 }, { "epoch": 1.2572201056900578, "grad_norm": 0.062070905997827025, "learning_rate": 0.00017397944488142298, "loss": 0.8532, "step": 71610 }, { "epoch": 1.2573956705700593, "grad_norm": 0.05837587774647428, "learning_rate": 0.0001739719145375937, "loss": 0.8473, "step": 71620 }, { "epoch": 1.2575712354500606, "grad_norm": 0.06953716318400378, "learning_rate": 0.00017396438326919625, "loss": 0.8412, "step": 71630 }, { "epoch": 1.257746800330062, "grad_norm": 0.0527386776939109, "learning_rate": 0.00017395685107632613, "loss": 0.8583, "step": 71640 }, { "epoch": 1.2579223652100633, "grad_norm": 0.08197377651957635, "learning_rate": 0.0001739493179590787, "loss": 0.8509, "step": 71650 }, { "epoch": 1.2580979300900648, "grad_norm": 0.05812068512939487, "learning_rate": 0.00017394178391754944, "loss": 0.8508, "step": 71660 }, { "epoch": 1.2582734949700662, "grad_norm": 0.05803741818164181, "learning_rate": 0.00017393424895183378, "loss": 0.8572, "step": 71670 }, { "epoch": 1.2584490598500677, "grad_norm": 0.0610304702355124, "learning_rate": 0.0001739267130620272, "loss": 0.8531, "step": 71680 }, { "epoch": 1.258624624730069, "grad_norm": 0.08607748874491718, "learning_rate": 0.00017391917624822522, "loss": 0.8521, "step": 71690 }, { "epoch": 1.2588001896100705, "grad_norm": 0.05136774917703949, "learning_rate": 0.00017391163851052324, "loss": 0.8516, "step": 71700 }, { "epoch": 1.2589757544900717, "grad_norm": 0.05310295725214159, "learning_rate": 0.0001739040998490169, "loss": 0.8582, "step": 71710 }, { "epoch": 1.2591513193700732, "grad_norm": 0.05909427703800031, "learning_rate": 0.00017389656026380162, "loss": 0.8442, "step": 71720 }, { "epoch": 1.2593268842500747, "grad_norm": 0.06920109910118952, "learning_rate": 0.00017388901975497293, "loss": 0.8495, "step": 71730 }, { "epoch": 1.2595024491300761, "grad_norm": 0.08031423994713356, "learning_rate": 0.0001738814783226264, "loss": 0.8515, "step": 71740 }, { "epoch": 1.2596780140100774, "grad_norm": 0.0780928151247025, "learning_rate": 0.00017387393596685763, "loss": 0.8615, "step": 71750 }, { "epoch": 1.2598535788900789, "grad_norm": 0.10481713102630366, "learning_rate": 0.00017386639268776203, "loss": 0.8472, "step": 71760 }, { "epoch": 1.2600291437700801, "grad_norm": 0.05770982968373696, "learning_rate": 0.00017385884848543533, "loss": 0.8476, "step": 71770 }, { "epoch": 1.2602047086500816, "grad_norm": 0.07262518321790301, "learning_rate": 0.00017385130335997303, "loss": 0.8484, "step": 71780 }, { "epoch": 1.260380273530083, "grad_norm": 0.05512697141255702, "learning_rate": 0.00017384375731147077, "loss": 0.854, "step": 71790 }, { "epoch": 1.2605558384100846, "grad_norm": 0.05680316446851685, "learning_rate": 0.00017383621034002412, "loss": 0.8455, "step": 71800 }, { "epoch": 1.2607314032900858, "grad_norm": 0.13879500673206, "learning_rate": 0.00017382866244572875, "loss": 0.8396, "step": 71810 }, { "epoch": 1.2609069681700873, "grad_norm": 0.056798179717928804, "learning_rate": 0.00017382111362868025, "loss": 0.8507, "step": 71820 }, { "epoch": 1.2610825330500885, "grad_norm": 0.08153001435661585, "learning_rate": 0.0001738135638889743, "loss": 0.8489, "step": 71830 }, { "epoch": 1.26125809793009, "grad_norm": 0.06099042661420578, "learning_rate": 0.00017380601322670655, "loss": 0.8541, "step": 71840 }, { "epoch": 1.2614336628100915, "grad_norm": 0.05206430758727015, "learning_rate": 0.00017379846164197263, "loss": 0.8541, "step": 71850 }, { "epoch": 1.261609227690093, "grad_norm": 0.050678718383597866, "learning_rate": 0.00017379090913486827, "loss": 0.8462, "step": 71860 }, { "epoch": 1.2617847925700942, "grad_norm": 0.05956329717122795, "learning_rate": 0.00017378335570548915, "loss": 0.8479, "step": 71870 }, { "epoch": 1.2619603574500957, "grad_norm": 0.06724492144055447, "learning_rate": 0.00017377580135393095, "loss": 0.8499, "step": 71880 }, { "epoch": 1.262135922330097, "grad_norm": 0.06495932140413674, "learning_rate": 0.00017376824608028943, "loss": 0.8495, "step": 71890 }, { "epoch": 1.2623114872100984, "grad_norm": 0.05800867894715242, "learning_rate": 0.00017376068988466024, "loss": 0.8459, "step": 71900 }, { "epoch": 1.2624870520901, "grad_norm": 0.08027348081651836, "learning_rate": 0.00017375313276713918, "loss": 0.8547, "step": 71910 }, { "epoch": 1.2626626169701014, "grad_norm": 0.057944606714618686, "learning_rate": 0.000173745574727822, "loss": 0.8401, "step": 71920 }, { "epoch": 1.2628381818501027, "grad_norm": 0.07110636409592908, "learning_rate": 0.00017373801576680442, "loss": 0.8442, "step": 71930 }, { "epoch": 1.2630137467301041, "grad_norm": 0.0557716149161617, "learning_rate": 0.00017373045588418227, "loss": 0.8531, "step": 71940 }, { "epoch": 1.2631893116101054, "grad_norm": 0.06008205288788792, "learning_rate": 0.0001737228950800513, "loss": 0.8437, "step": 71950 }, { "epoch": 1.2633648764901069, "grad_norm": 0.07183681043828141, "learning_rate": 0.00017371533335450726, "loss": 0.8505, "step": 71960 }, { "epoch": 1.2635404413701083, "grad_norm": 0.05257656848204088, "learning_rate": 0.00017370777070764608, "loss": 0.8572, "step": 71970 }, { "epoch": 1.2637160062501098, "grad_norm": 0.04864732401223535, "learning_rate": 0.00017370020713956347, "loss": 0.8533, "step": 71980 }, { "epoch": 1.263891571130111, "grad_norm": 0.05519761310507869, "learning_rate": 0.0001736926426503553, "loss": 0.8559, "step": 71990 }, { "epoch": 1.2640671360101126, "grad_norm": 0.0579481539704185, "learning_rate": 0.00017368507724011743, "loss": 0.8517, "step": 72000 }, { "epoch": 1.2642427008901138, "grad_norm": 0.06939207835880319, "learning_rate": 0.0001736775109089457, "loss": 0.8465, "step": 72010 }, { "epoch": 1.2644182657701153, "grad_norm": 0.0553914543329916, "learning_rate": 0.00017366994365693598, "loss": 0.8485, "step": 72020 }, { "epoch": 1.2645938306501168, "grad_norm": 0.07266915344246179, "learning_rate": 0.00017366237548418416, "loss": 0.8493, "step": 72030 }, { "epoch": 1.2647693955301182, "grad_norm": 0.0637203162671689, "learning_rate": 0.0001736548063907861, "loss": 0.8486, "step": 72040 }, { "epoch": 1.2649449604101195, "grad_norm": 0.06736655417740198, "learning_rate": 0.00017364723637683773, "loss": 0.8448, "step": 72050 }, { "epoch": 1.265120525290121, "grad_norm": 0.06406376781908135, "learning_rate": 0.00017363966544243494, "loss": 0.8466, "step": 72060 }, { "epoch": 1.2652960901701222, "grad_norm": 0.06831457300789981, "learning_rate": 0.0001736320935876737, "loss": 0.853, "step": 72070 }, { "epoch": 1.2654716550501237, "grad_norm": 0.1024071017555008, "learning_rate": 0.00017362452081264986, "loss": 0.8516, "step": 72080 }, { "epoch": 1.2656472199301252, "grad_norm": 0.05422877947563077, "learning_rate": 0.00017361694711745944, "loss": 0.8532, "step": 72090 }, { "epoch": 1.2658227848101267, "grad_norm": 0.047274158847977185, "learning_rate": 0.0001736093725021984, "loss": 0.8397, "step": 72100 }, { "epoch": 1.265998349690128, "grad_norm": 0.05971160516134048, "learning_rate": 0.0001736017969669627, "loss": 0.8516, "step": 72110 }, { "epoch": 1.2661739145701294, "grad_norm": 0.0576977159343568, "learning_rate": 0.00017359422051184832, "loss": 0.8541, "step": 72120 }, { "epoch": 1.2663494794501309, "grad_norm": 0.06110890120661097, "learning_rate": 0.00017358664313695123, "loss": 0.8505, "step": 72130 }, { "epoch": 1.2665250443301321, "grad_norm": 0.06651129925774067, "learning_rate": 0.00017357906484236746, "loss": 0.8618, "step": 72140 }, { "epoch": 1.2667006092101336, "grad_norm": 0.1012973574493652, "learning_rate": 0.00017357148562819306, "loss": 0.8614, "step": 72150 }, { "epoch": 1.266876174090135, "grad_norm": 0.054266394096495855, "learning_rate": 0.00017356390549452398, "loss": 0.8544, "step": 72160 }, { "epoch": 1.2670517389701363, "grad_norm": 0.0718110344774396, "learning_rate": 0.00017355632444145634, "loss": 0.8517, "step": 72170 }, { "epoch": 1.2672273038501378, "grad_norm": 0.048974919094991305, "learning_rate": 0.00017354874246908617, "loss": 0.8526, "step": 72180 }, { "epoch": 1.2674028687301393, "grad_norm": 0.04972756332151006, "learning_rate": 0.00017354115957750953, "loss": 0.8538, "step": 72190 }, { "epoch": 1.2675784336101406, "grad_norm": 0.05420287367649407, "learning_rate": 0.00017353357576682248, "loss": 0.8441, "step": 72200 }, { "epoch": 1.267753998490142, "grad_norm": 0.06134042765217292, "learning_rate": 0.00017352599103712113, "loss": 0.8608, "step": 72210 }, { "epoch": 1.2679295633701435, "grad_norm": 0.06547837576705963, "learning_rate": 0.00017351840538850158, "loss": 0.8449, "step": 72220 }, { "epoch": 1.2681051282501448, "grad_norm": 0.08289446739796197, "learning_rate": 0.0001735108188210599, "loss": 0.8487, "step": 72230 }, { "epoch": 1.2682806931301462, "grad_norm": 0.06848268757865146, "learning_rate": 0.00017350323133489233, "loss": 0.8549, "step": 72240 }, { "epoch": 1.2684562580101477, "grad_norm": 0.05988075332982882, "learning_rate": 0.00017349564293009487, "loss": 0.8522, "step": 72250 }, { "epoch": 1.268631822890149, "grad_norm": 0.05230366962040503, "learning_rate": 0.00017348805360676368, "loss": 0.8466, "step": 72260 }, { "epoch": 1.2688073877701505, "grad_norm": 0.05397959649639676, "learning_rate": 0.00017348046336499503, "loss": 0.8452, "step": 72270 }, { "epoch": 1.268982952650152, "grad_norm": 0.06766406516841464, "learning_rate": 0.00017347287220488499, "loss": 0.8501, "step": 72280 }, { "epoch": 1.2691585175301532, "grad_norm": 0.061307581258155656, "learning_rate": 0.00017346528012652975, "loss": 0.8496, "step": 72290 }, { "epoch": 1.2693340824101547, "grad_norm": 0.06586352797348763, "learning_rate": 0.00017345768713002556, "loss": 0.8476, "step": 72300 }, { "epoch": 1.2695096472901561, "grad_norm": 0.06585264956194647, "learning_rate": 0.00017345009321546856, "loss": 0.8406, "step": 72310 }, { "epoch": 1.2696852121701574, "grad_norm": 0.05605543023658029, "learning_rate": 0.00017344249838295502, "loss": 0.857, "step": 72320 }, { "epoch": 1.2698607770501589, "grad_norm": 0.04833524288304587, "learning_rate": 0.0001734349026325811, "loss": 0.8467, "step": 72330 }, { "epoch": 1.2700363419301604, "grad_norm": 0.08902168362225377, "learning_rate": 0.00017342730596444313, "loss": 0.8561, "step": 72340 }, { "epoch": 1.2702119068101618, "grad_norm": 0.06151489913496178, "learning_rate": 0.0001734197083786373, "loss": 0.8496, "step": 72350 }, { "epoch": 1.270387471690163, "grad_norm": 0.06622829686583584, "learning_rate": 0.00017341210987525986, "loss": 0.8468, "step": 72360 }, { "epoch": 1.2705630365701646, "grad_norm": 0.052106701269316213, "learning_rate": 0.00017340451045440712, "loss": 0.8518, "step": 72370 }, { "epoch": 1.2707386014501658, "grad_norm": 0.06304563616026228, "learning_rate": 0.00017339691011617538, "loss": 0.8534, "step": 72380 }, { "epoch": 1.2709141663301673, "grad_norm": 0.05243629016220857, "learning_rate": 0.0001733893088606609, "loss": 0.8546, "step": 72390 }, { "epoch": 1.2710897312101688, "grad_norm": 0.0657653151245965, "learning_rate": 0.00017338170668796, "loss": 0.8526, "step": 72400 }, { "epoch": 1.2712652960901702, "grad_norm": 0.062392395702821984, "learning_rate": 0.000173374103598169, "loss": 0.8552, "step": 72410 }, { "epoch": 1.2714408609701715, "grad_norm": 0.06393423153858042, "learning_rate": 0.00017336649959138423, "loss": 0.8512, "step": 72420 }, { "epoch": 1.271616425850173, "grad_norm": 0.05689527049104964, "learning_rate": 0.00017335889466770207, "loss": 0.8505, "step": 72430 }, { "epoch": 1.2717919907301742, "grad_norm": 0.0646077600188929, "learning_rate": 0.00017335128882721882, "loss": 0.8542, "step": 72440 }, { "epoch": 1.2719675556101757, "grad_norm": 0.07986456395887101, "learning_rate": 0.0001733436820700309, "loss": 0.8555, "step": 72450 }, { "epoch": 1.2721431204901772, "grad_norm": 0.08026660209438408, "learning_rate": 0.00017333607439623465, "loss": 0.852, "step": 72460 }, { "epoch": 1.2723186853701787, "grad_norm": 0.05046487354773926, "learning_rate": 0.00017332846580592648, "loss": 0.8533, "step": 72470 }, { "epoch": 1.27249425025018, "grad_norm": 0.07364286914334957, "learning_rate": 0.00017332085629920277, "loss": 0.8497, "step": 72480 }, { "epoch": 1.2726698151301814, "grad_norm": 0.06971278341374511, "learning_rate": 0.00017331324587615995, "loss": 0.8424, "step": 72490 }, { "epoch": 1.2728453800101827, "grad_norm": 0.06711466986083427, "learning_rate": 0.00017330563453689445, "loss": 0.8435, "step": 72500 }, { "epoch": 1.2730209448901841, "grad_norm": 0.0641308819366934, "learning_rate": 0.0001732980222815027, "loss": 0.8539, "step": 72510 }, { "epoch": 1.2731965097701856, "grad_norm": 0.045907248472457224, "learning_rate": 0.00017329040911008117, "loss": 0.8453, "step": 72520 }, { "epoch": 1.273372074650187, "grad_norm": 0.05283710967053061, "learning_rate": 0.00017328279502272628, "loss": 0.8482, "step": 72530 }, { "epoch": 1.2735476395301883, "grad_norm": 0.05916214260815508, "learning_rate": 0.00017327518001953457, "loss": 0.8518, "step": 72540 }, { "epoch": 1.2737232044101898, "grad_norm": 0.06615755680430736, "learning_rate": 0.00017326756410060242, "loss": 0.8415, "step": 72550 }, { "epoch": 1.273898769290191, "grad_norm": 0.07144374967754195, "learning_rate": 0.0001732599472660264, "loss": 0.8471, "step": 72560 }, { "epoch": 1.2740743341701926, "grad_norm": 0.046337313769100455, "learning_rate": 0.00017325232951590302, "loss": 0.8459, "step": 72570 }, { "epoch": 1.274249899050194, "grad_norm": 0.055423655857742615, "learning_rate": 0.00017324471085032876, "loss": 0.8594, "step": 72580 }, { "epoch": 1.2744254639301955, "grad_norm": 0.05130429995609372, "learning_rate": 0.00017323709126940018, "loss": 0.8552, "step": 72590 }, { "epoch": 1.2746010288101968, "grad_norm": 0.05594185927898379, "learning_rate": 0.00017322947077321382, "loss": 0.8591, "step": 72600 }, { "epoch": 1.2747765936901982, "grad_norm": 0.06823069283042688, "learning_rate": 0.00017322184936186617, "loss": 0.8543, "step": 72610 }, { "epoch": 1.2749521585701995, "grad_norm": 0.06239265888770375, "learning_rate": 0.0001732142270354539, "loss": 0.8454, "step": 72620 }, { "epoch": 1.275127723450201, "grad_norm": 0.05646378767194004, "learning_rate": 0.00017320660379407352, "loss": 0.8497, "step": 72630 }, { "epoch": 1.2753032883302025, "grad_norm": 0.07257632681849639, "learning_rate": 0.00017319897963782163, "loss": 0.8573, "step": 72640 }, { "epoch": 1.275478853210204, "grad_norm": 0.06949373634403548, "learning_rate": 0.00017319135456679482, "loss": 0.8564, "step": 72650 }, { "epoch": 1.2756544180902052, "grad_norm": 0.058246926931712614, "learning_rate": 0.0001731837285810897, "loss": 0.8509, "step": 72660 }, { "epoch": 1.2758299829702067, "grad_norm": 0.048165019902746284, "learning_rate": 0.00017317610168080295, "loss": 0.8526, "step": 72670 }, { "epoch": 1.276005547850208, "grad_norm": 0.05413299504504936, "learning_rate": 0.00017316847386603112, "loss": 0.8467, "step": 72680 }, { "epoch": 1.2761811127302094, "grad_norm": 0.053186351151287344, "learning_rate": 0.00017316084513687092, "loss": 0.8463, "step": 72690 }, { "epoch": 1.2763566776102109, "grad_norm": 0.06358401881461759, "learning_rate": 0.00017315321549341898, "loss": 0.8434, "step": 72700 }, { "epoch": 1.2765322424902124, "grad_norm": 0.04697959023258742, "learning_rate": 0.00017314558493577195, "loss": 0.8504, "step": 72710 }, { "epoch": 1.2767078073702136, "grad_norm": 0.051475151524720854, "learning_rate": 0.00017313795346402658, "loss": 0.8483, "step": 72720 }, { "epoch": 1.276883372250215, "grad_norm": 0.05742523074432333, "learning_rate": 0.00017313032107827943, "loss": 0.8504, "step": 72730 }, { "epoch": 1.2770589371302163, "grad_norm": 0.059494853202269486, "learning_rate": 0.0001731226877786274, "loss": 0.8404, "step": 72740 }, { "epoch": 1.2772345020102178, "grad_norm": 0.05955973719935818, "learning_rate": 0.00017311505356516703, "loss": 0.8465, "step": 72750 }, { "epoch": 1.2774100668902193, "grad_norm": 0.04999269014143875, "learning_rate": 0.00017310741843799506, "loss": 0.844, "step": 72760 }, { "epoch": 1.2775856317702208, "grad_norm": 0.08725427444858735, "learning_rate": 0.00017309978239720834, "loss": 0.8426, "step": 72770 }, { "epoch": 1.277761196650222, "grad_norm": 0.05707390072735316, "learning_rate": 0.00017309214544290359, "loss": 0.8459, "step": 72780 }, { "epoch": 1.2779367615302235, "grad_norm": 0.08681256765146149, "learning_rate": 0.0001730845075751775, "loss": 0.8452, "step": 72790 }, { "epoch": 1.2781123264102248, "grad_norm": 0.046286860785253243, "learning_rate": 0.00017307686879412687, "loss": 0.8398, "step": 72800 }, { "epoch": 1.2782878912902262, "grad_norm": 0.07048158332825585, "learning_rate": 0.0001730692290998485, "loss": 0.8554, "step": 72810 }, { "epoch": 1.2784634561702277, "grad_norm": 0.056448096353753065, "learning_rate": 0.0001730615884924392, "loss": 0.8426, "step": 72820 }, { "epoch": 1.2786390210502292, "grad_norm": 0.07271774448539563, "learning_rate": 0.0001730539469719958, "loss": 0.8472, "step": 72830 }, { "epoch": 1.2788145859302305, "grad_norm": 0.05857636678119472, "learning_rate": 0.00017304630453861503, "loss": 0.8471, "step": 72840 }, { "epoch": 1.278990150810232, "grad_norm": 0.060207472891722076, "learning_rate": 0.00017303866119239378, "loss": 0.8497, "step": 72850 }, { "epoch": 1.2791657156902334, "grad_norm": 0.05218615906913941, "learning_rate": 0.00017303101693342892, "loss": 0.852, "step": 72860 }, { "epoch": 1.2793412805702347, "grad_norm": 0.06048029677913812, "learning_rate": 0.00017302337176181725, "loss": 0.8475, "step": 72870 }, { "epoch": 1.2795168454502361, "grad_norm": 0.053816530674992105, "learning_rate": 0.00017301572567765566, "loss": 0.8484, "step": 72880 }, { "epoch": 1.2796924103302376, "grad_norm": 0.056669346164721905, "learning_rate": 0.00017300807868104098, "loss": 0.8592, "step": 72890 }, { "epoch": 1.2798679752102389, "grad_norm": 0.052883452869660376, "learning_rate": 0.0001730004307720702, "loss": 0.8472, "step": 72900 }, { "epoch": 1.2800435400902404, "grad_norm": 0.06702091291346095, "learning_rate": 0.00017299278195084014, "loss": 0.8503, "step": 72910 }, { "epoch": 1.2802191049702418, "grad_norm": 0.08140161519254802, "learning_rate": 0.00017298513221744775, "loss": 0.8521, "step": 72920 }, { "epoch": 1.280394669850243, "grad_norm": 0.08533559243539675, "learning_rate": 0.0001729774815719899, "loss": 0.8432, "step": 72930 }, { "epoch": 1.2805702347302446, "grad_norm": 0.10022293730734241, "learning_rate": 0.00017296983001456362, "loss": 0.8525, "step": 72940 }, { "epoch": 1.280745799610246, "grad_norm": 0.05012401481222619, "learning_rate": 0.0001729621775452658, "loss": 0.8546, "step": 72950 }, { "epoch": 1.2809213644902473, "grad_norm": 0.06816378768973584, "learning_rate": 0.00017295452416419334, "loss": 0.8538, "step": 72960 }, { "epoch": 1.2810969293702488, "grad_norm": 0.07413291488588591, "learning_rate": 0.00017294686987144332, "loss": 0.8462, "step": 72970 }, { "epoch": 1.2812724942502502, "grad_norm": 0.05150579050445229, "learning_rate": 0.00017293921466711264, "loss": 0.8515, "step": 72980 }, { "epoch": 1.2814480591302515, "grad_norm": 0.04975258485050058, "learning_rate": 0.00017293155855129831, "loss": 0.8571, "step": 72990 }, { "epoch": 1.281623624010253, "grad_norm": 0.06778766738961775, "learning_rate": 0.00017292390152409738, "loss": 0.8522, "step": 73000 }, { "epoch": 1.2817991888902545, "grad_norm": 0.05877493633006253, "learning_rate": 0.00017291624358560683, "loss": 0.8567, "step": 73010 }, { "epoch": 1.2819747537702557, "grad_norm": 0.05002161831591634, "learning_rate": 0.0001729085847359237, "loss": 0.8493, "step": 73020 }, { "epoch": 1.2821503186502572, "grad_norm": 0.08775193488987867, "learning_rate": 0.000172900924975145, "loss": 0.8501, "step": 73030 }, { "epoch": 1.2823258835302587, "grad_norm": 0.06104925594190535, "learning_rate": 0.00017289326430336782, "loss": 0.8476, "step": 73040 }, { "epoch": 1.28250144841026, "grad_norm": 0.061826210746164234, "learning_rate": 0.00017288560272068918, "loss": 0.8497, "step": 73050 }, { "epoch": 1.2826770132902614, "grad_norm": 0.08607715501946567, "learning_rate": 0.00017287794022720617, "loss": 0.848, "step": 73060 }, { "epoch": 1.2828525781702629, "grad_norm": 0.04693847877221865, "learning_rate": 0.00017287027682301592, "loss": 0.8453, "step": 73070 }, { "epoch": 1.2830281430502644, "grad_norm": 0.06252067545791616, "learning_rate": 0.00017286261250821548, "loss": 0.8455, "step": 73080 }, { "epoch": 1.2832037079302656, "grad_norm": 0.05886150603115208, "learning_rate": 0.00017285494728290192, "loss": 0.8519, "step": 73090 }, { "epoch": 1.283379272810267, "grad_norm": 0.055847853286046585, "learning_rate": 0.00017284728114717244, "loss": 0.8486, "step": 73100 }, { "epoch": 1.2835548376902683, "grad_norm": 0.07091610610890987, "learning_rate": 0.00017283961410112416, "loss": 0.8506, "step": 73110 }, { "epoch": 1.2837304025702698, "grad_norm": 0.045546295898004685, "learning_rate": 0.0001728319461448542, "loss": 0.8487, "step": 73120 }, { "epoch": 1.2839059674502713, "grad_norm": 0.07422576191386472, "learning_rate": 0.00017282427727845968, "loss": 0.8482, "step": 73130 }, { "epoch": 1.2840815323302728, "grad_norm": 0.08061921439310213, "learning_rate": 0.00017281660750203783, "loss": 0.8581, "step": 73140 }, { "epoch": 1.284257097210274, "grad_norm": 0.08552275451379841, "learning_rate": 0.00017280893681568578, "loss": 0.8428, "step": 73150 }, { "epoch": 1.2844326620902755, "grad_norm": 0.07054857493480682, "learning_rate": 0.00017280126521950075, "loss": 0.8555, "step": 73160 }, { "epoch": 1.2846082269702768, "grad_norm": 0.07914060809130374, "learning_rate": 0.00017279359271357993, "loss": 0.8462, "step": 73170 }, { "epoch": 1.2847837918502782, "grad_norm": 0.09986566742970417, "learning_rate": 0.00017278591929802053, "loss": 0.8462, "step": 73180 }, { "epoch": 1.2849593567302797, "grad_norm": 0.06263158823671339, "learning_rate": 0.00017277824497291978, "loss": 0.8507, "step": 73190 }, { "epoch": 1.2851349216102812, "grad_norm": 0.06134670374982775, "learning_rate": 0.0001727705697383749, "loss": 0.847, "step": 73200 }, { "epoch": 1.2853104864902825, "grad_norm": 0.05246838822047935, "learning_rate": 0.00017276289359448316, "loss": 0.8515, "step": 73210 }, { "epoch": 1.285486051370284, "grad_norm": 0.06058361048257272, "learning_rate": 0.00017275521654134176, "loss": 0.8544, "step": 73220 }, { "epoch": 1.2856616162502852, "grad_norm": 0.06304913951836562, "learning_rate": 0.00017274753857904806, "loss": 0.8473, "step": 73230 }, { "epoch": 1.2858371811302867, "grad_norm": 0.05657239081423363, "learning_rate": 0.00017273985970769928, "loss": 0.849, "step": 73240 }, { "epoch": 1.2860127460102881, "grad_norm": 0.06913965697659578, "learning_rate": 0.00017273217992739274, "loss": 0.8409, "step": 73250 }, { "epoch": 1.2861883108902896, "grad_norm": 0.062354743205830156, "learning_rate": 0.00017272449923822573, "loss": 0.8473, "step": 73260 }, { "epoch": 1.2863638757702909, "grad_norm": 0.04967282649331241, "learning_rate": 0.00017271681764029553, "loss": 0.848, "step": 73270 }, { "epoch": 1.2865394406502924, "grad_norm": 0.06033087017913372, "learning_rate": 0.00017270913513369956, "loss": 0.848, "step": 73280 }, { "epoch": 1.2867150055302936, "grad_norm": 0.05398420280585345, "learning_rate": 0.00017270145171853512, "loss": 0.8481, "step": 73290 }, { "epoch": 1.286890570410295, "grad_norm": 0.07126507686212222, "learning_rate": 0.0001726937673948995, "loss": 0.859, "step": 73300 }, { "epoch": 1.2870661352902966, "grad_norm": 0.05981011155717568, "learning_rate": 0.00017268608216289012, "loss": 0.8506, "step": 73310 }, { "epoch": 1.287241700170298, "grad_norm": 0.07538819519276833, "learning_rate": 0.0001726783960226043, "loss": 0.8496, "step": 73320 }, { "epoch": 1.2874172650502993, "grad_norm": 0.09359454331340025, "learning_rate": 0.00017267070897413953, "loss": 0.845, "step": 73330 }, { "epoch": 1.2875928299303008, "grad_norm": 0.07023328232191793, "learning_rate": 0.0001726630210175931, "loss": 0.8534, "step": 73340 }, { "epoch": 1.287768394810302, "grad_norm": 0.07183785923844149, "learning_rate": 0.00017265533215306248, "loss": 0.8474, "step": 73350 }, { "epoch": 1.2879439596903035, "grad_norm": 0.07464511369042902, "learning_rate": 0.00017264764238064507, "loss": 0.8501, "step": 73360 }, { "epoch": 1.288119524570305, "grad_norm": 0.06811377391890289, "learning_rate": 0.00017263995170043828, "loss": 0.8507, "step": 73370 }, { "epoch": 1.2882950894503065, "grad_norm": 0.05223193210588684, "learning_rate": 0.0001726322601125396, "loss": 0.8456, "step": 73380 }, { "epoch": 1.2884706543303077, "grad_norm": 0.05160386821395264, "learning_rate": 0.00017262456761704643, "loss": 0.8556, "step": 73390 }, { "epoch": 1.2886462192103092, "grad_norm": 0.0742316599104114, "learning_rate": 0.0001726168742140563, "loss": 0.8446, "step": 73400 }, { "epoch": 1.2888217840903105, "grad_norm": 0.060016081976680624, "learning_rate": 0.0001726091799036666, "loss": 0.8523, "step": 73410 }, { "epoch": 1.288997348970312, "grad_norm": 0.05805920348578582, "learning_rate": 0.0001726014846859749, "loss": 0.8575, "step": 73420 }, { "epoch": 1.2891729138503134, "grad_norm": 0.06521551254226224, "learning_rate": 0.00017259378856107865, "loss": 0.8463, "step": 73430 }, { "epoch": 1.2893484787303149, "grad_norm": 0.05966878519005171, "learning_rate": 0.00017258609152907538, "loss": 0.8482, "step": 73440 }, { "epoch": 1.2895240436103161, "grad_norm": 0.07252794059649711, "learning_rate": 0.00017257839359006261, "loss": 0.8512, "step": 73450 }, { "epoch": 1.2896996084903176, "grad_norm": 0.07472958201270627, "learning_rate": 0.00017257069474413788, "loss": 0.8449, "step": 73460 }, { "epoch": 1.2898751733703189, "grad_norm": 0.09011592848803353, "learning_rate": 0.00017256299499139873, "loss": 0.8432, "step": 73470 }, { "epoch": 1.2900507382503204, "grad_norm": 0.061540605353765376, "learning_rate": 0.00017255529433194273, "loss": 0.849, "step": 73480 }, { "epoch": 1.2902263031303218, "grad_norm": 0.07314905093762057, "learning_rate": 0.00017254759276586742, "loss": 0.8566, "step": 73490 }, { "epoch": 1.2904018680103233, "grad_norm": 0.09060770964520681, "learning_rate": 0.0001725398902932704, "loss": 0.8539, "step": 73500 }, { "epoch": 1.2905774328903246, "grad_norm": 0.0558176225167753, "learning_rate": 0.00017253218691424925, "loss": 0.8527, "step": 73510 }, { "epoch": 1.290752997770326, "grad_norm": 0.05762935425478981, "learning_rate": 0.00017252448262890162, "loss": 0.8569, "step": 73520 }, { "epoch": 1.2909285626503273, "grad_norm": 0.05911664024271116, "learning_rate": 0.00017251677743732505, "loss": 0.8374, "step": 73530 }, { "epoch": 1.2911041275303288, "grad_norm": 0.05571685001882873, "learning_rate": 0.00017250907133961724, "loss": 0.845, "step": 73540 }, { "epoch": 1.2912796924103302, "grad_norm": 0.05876544596606894, "learning_rate": 0.00017250136433587577, "loss": 0.854, "step": 73550 }, { "epoch": 1.2914552572903317, "grad_norm": 0.05886615259475377, "learning_rate": 0.0001724936564261983, "loss": 0.842, "step": 73560 }, { "epoch": 1.291630822170333, "grad_norm": 0.06155967664031375, "learning_rate": 0.0001724859476106825, "loss": 0.8504, "step": 73570 }, { "epoch": 1.2918063870503345, "grad_norm": 0.058761895654702606, "learning_rate": 0.00017247823788942604, "loss": 0.8483, "step": 73580 }, { "epoch": 1.291981951930336, "grad_norm": 0.06555737423044639, "learning_rate": 0.00017247052726252664, "loss": 0.8494, "step": 73590 }, { "epoch": 1.2921575168103372, "grad_norm": 0.04532774699752811, "learning_rate": 0.00017246281573008194, "loss": 0.8515, "step": 73600 }, { "epoch": 1.2923330816903387, "grad_norm": 0.059993960623318146, "learning_rate": 0.00017245510329218968, "loss": 0.8563, "step": 73610 }, { "epoch": 1.2925086465703401, "grad_norm": 0.06709992219917356, "learning_rate": 0.00017244738994894756, "loss": 0.8506, "step": 73620 }, { "epoch": 1.2926842114503414, "grad_norm": 0.04893265864291791, "learning_rate": 0.0001724396757004533, "loss": 0.8487, "step": 73630 }, { "epoch": 1.2928597763303429, "grad_norm": 0.06381086854309087, "learning_rate": 0.00017243196054680468, "loss": 0.8514, "step": 73640 }, { "epoch": 1.2930353412103444, "grad_norm": 0.08792255914605342, "learning_rate": 0.00017242424448809943, "loss": 0.8538, "step": 73650 }, { "epoch": 1.2932109060903456, "grad_norm": 0.06509480969464801, "learning_rate": 0.00017241652752443532, "loss": 0.8652, "step": 73660 }, { "epoch": 1.293386470970347, "grad_norm": 0.05452320234984717, "learning_rate": 0.00017240880965591009, "loss": 0.8486, "step": 73670 }, { "epoch": 1.2935620358503486, "grad_norm": 0.06175166784135403, "learning_rate": 0.00017240109088262156, "loss": 0.8506, "step": 73680 }, { "epoch": 1.2937376007303498, "grad_norm": 0.06977605276281898, "learning_rate": 0.00017239337120466758, "loss": 0.8449, "step": 73690 }, { "epoch": 1.2939131656103513, "grad_norm": 0.05296309382784827, "learning_rate": 0.00017238565062214584, "loss": 0.8501, "step": 73700 }, { "epoch": 1.2940887304903528, "grad_norm": 0.05579145797069548, "learning_rate": 0.00017237792913515424, "loss": 0.8534, "step": 73710 }, { "epoch": 1.294264295370354, "grad_norm": 0.12507311210278219, "learning_rate": 0.0001723702067437906, "loss": 0.8451, "step": 73720 }, { "epoch": 1.2944398602503555, "grad_norm": 0.060054124646705107, "learning_rate": 0.0001723624834481528, "loss": 0.8496, "step": 73730 }, { "epoch": 1.294615425130357, "grad_norm": 0.05347420571166263, "learning_rate": 0.00017235475924833862, "loss": 0.8537, "step": 73740 }, { "epoch": 1.2947909900103582, "grad_norm": 0.07621497645519654, "learning_rate": 0.00017234703414444595, "loss": 0.8493, "step": 73750 }, { "epoch": 1.2949665548903597, "grad_norm": 0.058149868082597075, "learning_rate": 0.00017233930813657274, "loss": 0.8531, "step": 73760 }, { "epoch": 1.2951421197703612, "grad_norm": 0.05304157838711208, "learning_rate": 0.0001723315812248168, "loss": 0.8537, "step": 73770 }, { "epoch": 1.2953176846503625, "grad_norm": 0.06409574173616833, "learning_rate": 0.00017232385340927602, "loss": 0.8457, "step": 73780 }, { "epoch": 1.295493249530364, "grad_norm": 0.05647404112808053, "learning_rate": 0.00017231612469004837, "loss": 0.8486, "step": 73790 }, { "epoch": 1.2956688144103654, "grad_norm": 0.08565247546534585, "learning_rate": 0.00017230839506723175, "loss": 0.8596, "step": 73800 }, { "epoch": 1.2958443792903669, "grad_norm": 0.07005167272926219, "learning_rate": 0.0001723006645409241, "loss": 0.8479, "step": 73810 }, { "epoch": 1.2960199441703681, "grad_norm": 0.0635759074902479, "learning_rate": 0.00017229293311122338, "loss": 0.8507, "step": 73820 }, { "epoch": 1.2961955090503696, "grad_norm": 0.06347821631926169, "learning_rate": 0.00017228520077822753, "loss": 0.8498, "step": 73830 }, { "epoch": 1.2963710739303709, "grad_norm": 0.05369291177329647, "learning_rate": 0.0001722774675420345, "loss": 0.8425, "step": 73840 }, { "epoch": 1.2965466388103724, "grad_norm": 0.06211198560294258, "learning_rate": 0.00017226973340274233, "loss": 0.8464, "step": 73850 }, { "epoch": 1.2967222036903738, "grad_norm": 0.048219313549484964, "learning_rate": 0.00017226199836044897, "loss": 0.8525, "step": 73860 }, { "epoch": 1.2968977685703753, "grad_norm": 0.048478708878928314, "learning_rate": 0.00017225426241525242, "loss": 0.8536, "step": 73870 }, { "epoch": 1.2970733334503766, "grad_norm": 0.07027740719019251, "learning_rate": 0.0001722465255672507, "loss": 0.8493, "step": 73880 }, { "epoch": 1.297248898330378, "grad_norm": 0.055289382373716216, "learning_rate": 0.0001722387878165419, "loss": 0.8527, "step": 73890 }, { "epoch": 1.2974244632103793, "grad_norm": 0.05887788820449988, "learning_rate": 0.00017223104916322393, "loss": 0.8527, "step": 73900 }, { "epoch": 1.2976000280903808, "grad_norm": 0.05288512733886304, "learning_rate": 0.00017222330960739497, "loss": 0.85, "step": 73910 }, { "epoch": 1.2977755929703823, "grad_norm": 0.06513423317048611, "learning_rate": 0.00017221556914915302, "loss": 0.8559, "step": 73920 }, { "epoch": 1.2979511578503837, "grad_norm": 0.07767494054580486, "learning_rate": 0.00017220782778859617, "loss": 0.8469, "step": 73930 }, { "epoch": 1.298126722730385, "grad_norm": 0.05318072931353284, "learning_rate": 0.00017220008552582244, "loss": 0.8468, "step": 73940 }, { "epoch": 1.2983022876103865, "grad_norm": 0.07121327122755636, "learning_rate": 0.00017219234236093002, "loss": 0.8536, "step": 73950 }, { "epoch": 1.2984778524903877, "grad_norm": 0.05510307045987649, "learning_rate": 0.00017218459829401695, "loss": 0.8507, "step": 73960 }, { "epoch": 1.2986534173703892, "grad_norm": 0.05341214117251008, "learning_rate": 0.00017217685332518144, "loss": 0.8429, "step": 73970 }, { "epoch": 1.2988289822503907, "grad_norm": 0.05512429632890281, "learning_rate": 0.00017216910745452153, "loss": 0.845, "step": 73980 }, { "epoch": 1.2990045471303922, "grad_norm": 0.05751227820346895, "learning_rate": 0.00017216136068213538, "loss": 0.8421, "step": 73990 }, { "epoch": 1.2991801120103934, "grad_norm": 0.05181629059815639, "learning_rate": 0.00017215361300812113, "loss": 0.8501, "step": 74000 }, { "epoch": 1.2993556768903949, "grad_norm": 0.06440408512565507, "learning_rate": 0.000172145864432577, "loss": 0.8518, "step": 74010 }, { "epoch": 1.2995312417703961, "grad_norm": 0.07565814868340986, "learning_rate": 0.0001721381149556011, "loss": 0.8451, "step": 74020 }, { "epoch": 1.2997068066503976, "grad_norm": 0.0555412818417386, "learning_rate": 0.00017213036457729166, "loss": 0.8531, "step": 74030 }, { "epoch": 1.299882371530399, "grad_norm": 0.051146012978933425, "learning_rate": 0.0001721226132977469, "loss": 0.8467, "step": 74040 }, { "epoch": 1.3000579364104006, "grad_norm": 0.05056515490857162, "learning_rate": 0.00017211486111706498, "loss": 0.8526, "step": 74050 }, { "epoch": 1.3002335012904018, "grad_norm": 0.05333324766522711, "learning_rate": 0.00017210710803534414, "loss": 0.8451, "step": 74060 }, { "epoch": 1.3004090661704033, "grad_norm": 0.06572787055006905, "learning_rate": 0.00017209935405268262, "loss": 0.8521, "step": 74070 }, { "epoch": 1.3005846310504046, "grad_norm": 0.07445121323941153, "learning_rate": 0.00017209159916917866, "loss": 0.853, "step": 74080 }, { "epoch": 1.300760195930406, "grad_norm": 0.0515126761945955, "learning_rate": 0.00017208384338493048, "loss": 0.8476, "step": 74090 }, { "epoch": 1.3009357608104075, "grad_norm": 0.06760225586881861, "learning_rate": 0.0001720760867000364, "loss": 0.8496, "step": 74100 }, { "epoch": 1.301111325690409, "grad_norm": 0.06060267836063854, "learning_rate": 0.00017206832911459473, "loss": 0.8423, "step": 74110 }, { "epoch": 1.3012868905704102, "grad_norm": 0.06290533002531122, "learning_rate": 0.00017206057062870363, "loss": 0.8495, "step": 74120 }, { "epoch": 1.3014624554504117, "grad_norm": 0.0557818686777932, "learning_rate": 0.00017205281124246154, "loss": 0.845, "step": 74130 }, { "epoch": 1.301638020330413, "grad_norm": 0.05243812395221059, "learning_rate": 0.00017204505095596667, "loss": 0.8472, "step": 74140 }, { "epoch": 1.3018135852104145, "grad_norm": 0.07226005132807674, "learning_rate": 0.00017203728976931742, "loss": 0.8559, "step": 74150 }, { "epoch": 1.301989150090416, "grad_norm": 0.049779600983327704, "learning_rate": 0.0001720295276826121, "loss": 0.853, "step": 74160 }, { "epoch": 1.3021647149704174, "grad_norm": 0.05018129822018999, "learning_rate": 0.00017202176469594902, "loss": 0.8415, "step": 74170 }, { "epoch": 1.3023402798504187, "grad_norm": 0.05006961274925179, "learning_rate": 0.0001720140008094266, "loss": 0.8442, "step": 74180 }, { "epoch": 1.3025158447304201, "grad_norm": 0.050518046628059234, "learning_rate": 0.00017200623602314316, "loss": 0.852, "step": 74190 }, { "epoch": 1.3026914096104214, "grad_norm": 0.053831005675684065, "learning_rate": 0.00017199847033719706, "loss": 0.8462, "step": 74200 }, { "epoch": 1.3028669744904229, "grad_norm": 0.05328959497845417, "learning_rate": 0.00017199070375168682, "loss": 0.8482, "step": 74210 }, { "epoch": 1.3030425393704244, "grad_norm": 0.0543331971644189, "learning_rate": 0.00017198293626671072, "loss": 0.852, "step": 74220 }, { "epoch": 1.3032181042504258, "grad_norm": 0.06351644917449928, "learning_rate": 0.0001719751678823672, "loss": 0.8529, "step": 74230 }, { "epoch": 1.303393669130427, "grad_norm": 0.0739957701330899, "learning_rate": 0.00017196739859875473, "loss": 0.8517, "step": 74240 }, { "epoch": 1.3035692340104286, "grad_norm": 0.11600927742609123, "learning_rate": 0.0001719596284159717, "loss": 0.8477, "step": 74250 }, { "epoch": 1.3037447988904298, "grad_norm": 0.05647655982025251, "learning_rate": 0.0001719518573341166, "loss": 0.8483, "step": 74260 }, { "epoch": 1.3039203637704313, "grad_norm": 0.0875380367113941, "learning_rate": 0.00017194408535328785, "loss": 0.8563, "step": 74270 }, { "epoch": 1.3040959286504328, "grad_norm": 0.05259583727774517, "learning_rate": 0.00017193631247358393, "loss": 0.8393, "step": 74280 }, { "epoch": 1.3042714935304343, "grad_norm": 0.06474860800886133, "learning_rate": 0.00017192853869510335, "loss": 0.847, "step": 74290 }, { "epoch": 1.3044470584104355, "grad_norm": 0.05376025196625303, "learning_rate": 0.00017192076401794458, "loss": 0.853, "step": 74300 }, { "epoch": 1.304622623290437, "grad_norm": 0.05812213251064769, "learning_rate": 0.00017191298844220613, "loss": 0.85, "step": 74310 }, { "epoch": 1.3047981881704385, "grad_norm": 0.05316708782884518, "learning_rate": 0.00017190521196798654, "loss": 0.852, "step": 74320 }, { "epoch": 1.3049737530504397, "grad_norm": 0.051911696992523716, "learning_rate": 0.00017189743459538438, "loss": 0.8536, "step": 74330 }, { "epoch": 1.3051493179304412, "grad_norm": 0.06109475599935442, "learning_rate": 0.00017188965632449808, "loss": 0.8442, "step": 74340 }, { "epoch": 1.3053248828104427, "grad_norm": 0.0532595383462383, "learning_rate": 0.00017188187715542626, "loss": 0.8441, "step": 74350 }, { "epoch": 1.305500447690444, "grad_norm": 0.04807762527855715, "learning_rate": 0.00017187409708826744, "loss": 0.8413, "step": 74360 }, { "epoch": 1.3056760125704454, "grad_norm": 0.07000787920486874, "learning_rate": 0.00017186631612312026, "loss": 0.8475, "step": 74370 }, { "epoch": 1.3058515774504469, "grad_norm": 0.08317030827402234, "learning_rate": 0.0001718585342600833, "loss": 0.8492, "step": 74380 }, { "epoch": 1.3060271423304481, "grad_norm": 0.06819193958522235, "learning_rate": 0.00017185075149925508, "loss": 0.8478, "step": 74390 }, { "epoch": 1.3062027072104496, "grad_norm": 0.06045399599317072, "learning_rate": 0.00017184296784073427, "loss": 0.853, "step": 74400 }, { "epoch": 1.306378272090451, "grad_norm": 0.0735309259029005, "learning_rate": 0.0001718351832846195, "loss": 0.8504, "step": 74410 }, { "epoch": 1.3065538369704524, "grad_norm": 0.059913019340722976, "learning_rate": 0.00017182739783100937, "loss": 0.8545, "step": 74420 }, { "epoch": 1.3067294018504538, "grad_norm": 0.059487900281193995, "learning_rate": 0.0001718196114800025, "loss": 0.8512, "step": 74430 }, { "epoch": 1.3069049667304553, "grad_norm": 0.0852715109939951, "learning_rate": 0.00017181182423169764, "loss": 0.8528, "step": 74440 }, { "epoch": 1.3070805316104566, "grad_norm": 0.056606057857806334, "learning_rate": 0.00017180403608619337, "loss": 0.8564, "step": 74450 }, { "epoch": 1.307256096490458, "grad_norm": 0.04634162129282192, "learning_rate": 0.00017179624704358838, "loss": 0.8525, "step": 74460 }, { "epoch": 1.3074316613704595, "grad_norm": 0.0719560642879591, "learning_rate": 0.0001717884571039814, "loss": 0.8596, "step": 74470 }, { "epoch": 1.3076072262504608, "grad_norm": 0.054642753056798464, "learning_rate": 0.00017178066626747108, "loss": 0.8462, "step": 74480 }, { "epoch": 1.3077827911304623, "grad_norm": 0.0637323641291427, "learning_rate": 0.0001717728745341562, "loss": 0.8486, "step": 74490 }, { "epoch": 1.3079583560104637, "grad_norm": 0.06443681402166967, "learning_rate": 0.00017176508190413538, "loss": 0.8508, "step": 74500 }, { "epoch": 1.308133920890465, "grad_norm": 0.06674804073158158, "learning_rate": 0.00017175728837750745, "loss": 0.8526, "step": 74510 }, { "epoch": 1.3083094857704665, "grad_norm": 0.07923480061522463, "learning_rate": 0.00017174949395437104, "loss": 0.85, "step": 74520 }, { "epoch": 1.308485050650468, "grad_norm": 0.04246419123334665, "learning_rate": 0.00017174169863482506, "loss": 0.8491, "step": 74530 }, { "epoch": 1.3086606155304694, "grad_norm": 0.057107736471411374, "learning_rate": 0.00017173390241896817, "loss": 0.8392, "step": 74540 }, { "epoch": 1.3088361804104707, "grad_norm": 0.06622786972117783, "learning_rate": 0.0001717261053068992, "loss": 0.8439, "step": 74550 }, { "epoch": 1.3090117452904722, "grad_norm": 0.06759733393305513, "learning_rate": 0.00017171830729871692, "loss": 0.8546, "step": 74560 }, { "epoch": 1.3091873101704734, "grad_norm": 0.06972073996836038, "learning_rate": 0.0001717105083945201, "loss": 0.8518, "step": 74570 }, { "epoch": 1.3093628750504749, "grad_norm": 0.051185568305431146, "learning_rate": 0.00017170270859440763, "loss": 0.8496, "step": 74580 }, { "epoch": 1.3095384399304764, "grad_norm": 0.04640943364806615, "learning_rate": 0.00017169490789847825, "loss": 0.8475, "step": 74590 }, { "epoch": 1.3097140048104778, "grad_norm": 0.05578177524568846, "learning_rate": 0.0001716871063068309, "loss": 0.8591, "step": 74600 }, { "epoch": 1.309889569690479, "grad_norm": 0.05531884254570576, "learning_rate": 0.0001716793038195643, "loss": 0.853, "step": 74610 }, { "epoch": 1.3100651345704806, "grad_norm": 0.052473297452198794, "learning_rate": 0.0001716715004367774, "loss": 0.8519, "step": 74620 }, { "epoch": 1.3102406994504818, "grad_norm": 0.05217579122796836, "learning_rate": 0.00017166369615856904, "loss": 0.8473, "step": 74630 }, { "epoch": 1.3104162643304833, "grad_norm": 0.07882802352372614, "learning_rate": 0.0001716558909850381, "loss": 0.8511, "step": 74640 }, { "epoch": 1.3105918292104848, "grad_norm": 0.08054906828775832, "learning_rate": 0.0001716480849162835, "loss": 0.8565, "step": 74650 }, { "epoch": 1.3107673940904863, "grad_norm": 0.06554469989297952, "learning_rate": 0.00017164027795240412, "loss": 0.8484, "step": 74660 }, { "epoch": 1.3109429589704875, "grad_norm": 0.07383733306434374, "learning_rate": 0.0001716324700934989, "loss": 0.8552, "step": 74670 }, { "epoch": 1.311118523850489, "grad_norm": 0.04464345561945777, "learning_rate": 0.00017162466133966673, "loss": 0.8493, "step": 74680 }, { "epoch": 1.3112940887304902, "grad_norm": 0.06419453283806215, "learning_rate": 0.00017161685169100654, "loss": 0.8524, "step": 74690 }, { "epoch": 1.3114696536104917, "grad_norm": 0.06694940559983877, "learning_rate": 0.00017160904114761733, "loss": 0.8476, "step": 74700 }, { "epoch": 1.3116452184904932, "grad_norm": 0.061020155870225586, "learning_rate": 0.00017160122970959804, "loss": 0.8439, "step": 74710 }, { "epoch": 1.3118207833704947, "grad_norm": 0.07065104924649951, "learning_rate": 0.0001715934173770476, "loss": 0.8518, "step": 74720 }, { "epoch": 1.311996348250496, "grad_norm": 0.06635263438364587, "learning_rate": 0.0001715856041500651, "loss": 0.8382, "step": 74730 }, { "epoch": 1.3121719131304974, "grad_norm": 0.05733698818132237, "learning_rate": 0.00017157779002874947, "loss": 0.8475, "step": 74740 }, { "epoch": 1.3123474780104987, "grad_norm": 0.05668802372944086, "learning_rate": 0.00017156997501319962, "loss": 0.8468, "step": 74750 }, { "epoch": 1.3125230428905001, "grad_norm": 0.05880841725375793, "learning_rate": 0.00017156215910351473, "loss": 0.8488, "step": 74760 }, { "epoch": 1.3126986077705016, "grad_norm": 0.05541042146455401, "learning_rate": 0.00017155434229979374, "loss": 0.8497, "step": 74770 }, { "epoch": 1.312874172650503, "grad_norm": 0.07463297570049932, "learning_rate": 0.00017154652460213573, "loss": 0.8469, "step": 74780 }, { "epoch": 1.3130497375305044, "grad_norm": 0.059560277062893206, "learning_rate": 0.00017153870601063973, "loss": 0.8482, "step": 74790 }, { "epoch": 1.3132253024105058, "grad_norm": 0.0664243643520026, "learning_rate": 0.0001715308865254048, "loss": 0.8455, "step": 74800 }, { "epoch": 1.313400867290507, "grad_norm": 0.06735038849521446, "learning_rate": 0.00017152306614653002, "loss": 0.8541, "step": 74810 }, { "epoch": 1.3135764321705086, "grad_norm": 0.059154348481150545, "learning_rate": 0.0001715152448741145, "loss": 0.8442, "step": 74820 }, { "epoch": 1.31375199705051, "grad_norm": 0.06541746199554971, "learning_rate": 0.00017150742270825728, "loss": 0.8446, "step": 74830 }, { "epoch": 1.3139275619305115, "grad_norm": 0.07729713643211511, "learning_rate": 0.00017149959964905752, "loss": 0.8511, "step": 74840 }, { "epoch": 1.3141031268105128, "grad_norm": 0.05765986082939206, "learning_rate": 0.0001714917756966143, "loss": 0.8454, "step": 74850 }, { "epoch": 1.3142786916905143, "grad_norm": 0.0461168926784104, "learning_rate": 0.00017148395085102677, "loss": 0.8576, "step": 74860 }, { "epoch": 1.3144542565705155, "grad_norm": 0.06452079813430821, "learning_rate": 0.0001714761251123941, "loss": 0.8522, "step": 74870 }, { "epoch": 1.314629821450517, "grad_norm": 0.06020677007407519, "learning_rate": 0.00017146829848081537, "loss": 0.8531, "step": 74880 }, { "epoch": 1.3148053863305185, "grad_norm": 0.06988162760331364, "learning_rate": 0.00017146047095638987, "loss": 0.8482, "step": 74890 }, { "epoch": 1.31498095121052, "grad_norm": 0.052769930881010174, "learning_rate": 0.00017145264253921665, "loss": 0.859, "step": 74900 }, { "epoch": 1.3151565160905212, "grad_norm": 0.059686027340659864, "learning_rate": 0.00017144481322939496, "loss": 0.8516, "step": 74910 }, { "epoch": 1.3153320809705227, "grad_norm": 0.0569589783238258, "learning_rate": 0.00017143698302702398, "loss": 0.855, "step": 74920 }, { "epoch": 1.315507645850524, "grad_norm": 0.051733064706925035, "learning_rate": 0.00017142915193220292, "loss": 0.8518, "step": 74930 }, { "epoch": 1.3156832107305254, "grad_norm": 0.06082229339635138, "learning_rate": 0.00017142131994503102, "loss": 0.8503, "step": 74940 }, { "epoch": 1.3158587756105269, "grad_norm": 0.05468611888989276, "learning_rate": 0.00017141348706560753, "loss": 0.8449, "step": 74950 }, { "epoch": 1.3160343404905284, "grad_norm": 0.06370364292548603, "learning_rate": 0.0001714056532940316, "loss": 0.8546, "step": 74960 }, { "epoch": 1.3162099053705296, "grad_norm": 0.04507859516052771, "learning_rate": 0.0001713978186304026, "loss": 0.8576, "step": 74970 }, { "epoch": 1.316385470250531, "grad_norm": 0.06583453293104122, "learning_rate": 0.0001713899830748197, "loss": 0.8513, "step": 74980 }, { "epoch": 1.3165610351305324, "grad_norm": 0.08481209795720843, "learning_rate": 0.00017138214662738225, "loss": 0.8483, "step": 74990 }, { "epoch": 1.3167366000105338, "grad_norm": 0.047362788274666195, "learning_rate": 0.0001713743092881895, "loss": 0.8577, "step": 75000 }, { "epoch": 1.3169121648905353, "grad_norm": 0.06892597456723117, "learning_rate": 0.0001713664710573408, "loss": 0.8482, "step": 75010 }, { "epoch": 1.3170877297705368, "grad_norm": 0.06139764551286314, "learning_rate": 0.00017135863193493543, "loss": 0.8475, "step": 75020 }, { "epoch": 1.317263294650538, "grad_norm": 0.06137213407156039, "learning_rate": 0.00017135079192107265, "loss": 0.8531, "step": 75030 }, { "epoch": 1.3174388595305395, "grad_norm": 0.060681317134714405, "learning_rate": 0.00017134295101585193, "loss": 0.8551, "step": 75040 }, { "epoch": 1.317614424410541, "grad_norm": 0.06958993944619012, "learning_rate": 0.00017133510921937245, "loss": 0.8512, "step": 75050 }, { "epoch": 1.3177899892905423, "grad_norm": 0.07822673505143969, "learning_rate": 0.00017132726653173373, "loss": 0.8472, "step": 75060 }, { "epoch": 1.3179655541705437, "grad_norm": 0.05362696573438668, "learning_rate": 0.00017131942295303503, "loss": 0.8389, "step": 75070 }, { "epoch": 1.3181411190505452, "grad_norm": 0.055315832950675686, "learning_rate": 0.00017131157848337576, "loss": 0.8456, "step": 75080 }, { "epoch": 1.3183166839305465, "grad_norm": 0.08263795549808108, "learning_rate": 0.00017130373312285535, "loss": 0.8508, "step": 75090 }, { "epoch": 1.318492248810548, "grad_norm": 0.0895446594918832, "learning_rate": 0.0001712958868715731, "loss": 0.8472, "step": 75100 }, { "epoch": 1.3186678136905494, "grad_norm": 0.06606916190707574, "learning_rate": 0.00017128803972962857, "loss": 0.8459, "step": 75110 }, { "epoch": 1.3188433785705507, "grad_norm": 0.0848749110091261, "learning_rate": 0.00017128019169712107, "loss": 0.8511, "step": 75120 }, { "epoch": 1.3190189434505522, "grad_norm": 0.08260538880269408, "learning_rate": 0.00017127234277415008, "loss": 0.8494, "step": 75130 }, { "epoch": 1.3191945083305536, "grad_norm": 0.07969226823167924, "learning_rate": 0.00017126449296081504, "loss": 0.8458, "step": 75140 }, { "epoch": 1.3193700732105549, "grad_norm": 0.06702989113783846, "learning_rate": 0.0001712566422572154, "loss": 0.8447, "step": 75150 }, { "epoch": 1.3195456380905564, "grad_norm": 0.055160250417652934, "learning_rate": 0.00017124879066345063, "loss": 0.8485, "step": 75160 }, { "epoch": 1.3197212029705578, "grad_norm": 0.04815133197641721, "learning_rate": 0.00017124093817962025, "loss": 0.8431, "step": 75170 }, { "epoch": 1.319896767850559, "grad_norm": 0.050148633970874046, "learning_rate": 0.00017123308480582368, "loss": 0.8567, "step": 75180 }, { "epoch": 1.3200723327305606, "grad_norm": 0.05325793859302122, "learning_rate": 0.00017122523054216048, "loss": 0.8415, "step": 75190 }, { "epoch": 1.320247897610562, "grad_norm": 0.05880171259868474, "learning_rate": 0.00017121737538873018, "loss": 0.8428, "step": 75200 }, { "epoch": 1.3204234624905635, "grad_norm": 0.056787487794800445, "learning_rate": 0.00017120951934563222, "loss": 0.8486, "step": 75210 }, { "epoch": 1.3205990273705648, "grad_norm": 0.0840854038656484, "learning_rate": 0.00017120166241296625, "loss": 0.8499, "step": 75220 }, { "epoch": 1.3207745922505663, "grad_norm": 0.05880948558264276, "learning_rate": 0.00017119380459083174, "loss": 0.8421, "step": 75230 }, { "epoch": 1.3209501571305675, "grad_norm": 0.062192642466828416, "learning_rate": 0.00017118594587932828, "loss": 0.8529, "step": 75240 }, { "epoch": 1.321125722010569, "grad_norm": 0.0837300327757396, "learning_rate": 0.00017117808627855542, "loss": 0.8553, "step": 75250 }, { "epoch": 1.3213012868905705, "grad_norm": 0.05001919923252674, "learning_rate": 0.00017117022578861277, "loss": 0.8481, "step": 75260 }, { "epoch": 1.321476851770572, "grad_norm": 0.05389523525669501, "learning_rate": 0.0001711623644095999, "loss": 0.8526, "step": 75270 }, { "epoch": 1.3216524166505732, "grad_norm": 0.05312733765982867, "learning_rate": 0.00017115450214161645, "loss": 0.8541, "step": 75280 }, { "epoch": 1.3218279815305747, "grad_norm": 0.06685575979572611, "learning_rate": 0.00017114663898476197, "loss": 0.8379, "step": 75290 }, { "epoch": 1.322003546410576, "grad_norm": 0.08327092701884586, "learning_rate": 0.0001711387749391362, "loss": 0.8511, "step": 75300 }, { "epoch": 1.3221791112905774, "grad_norm": 0.050788046450746485, "learning_rate": 0.00017113091000483865, "loss": 0.8426, "step": 75310 }, { "epoch": 1.322354676170579, "grad_norm": 0.05635260165337142, "learning_rate": 0.00017112304418196906, "loss": 0.8463, "step": 75320 }, { "epoch": 1.3225302410505804, "grad_norm": 0.07633224465194655, "learning_rate": 0.00017111517747062704, "loss": 0.844, "step": 75330 }, { "epoch": 1.3227058059305816, "grad_norm": 0.051350956252422836, "learning_rate": 0.00017110730987091228, "loss": 0.8432, "step": 75340 }, { "epoch": 1.322881370810583, "grad_norm": 0.06620301750755238, "learning_rate": 0.00017109944138292448, "loss": 0.8494, "step": 75350 }, { "epoch": 1.3230569356905844, "grad_norm": 0.048625935971782004, "learning_rate": 0.0001710915720067633, "loss": 0.8541, "step": 75360 }, { "epoch": 1.3232325005705858, "grad_norm": 0.052302443800912896, "learning_rate": 0.0001710837017425285, "loss": 0.8492, "step": 75370 }, { "epoch": 1.3234080654505873, "grad_norm": 0.05221721603430902, "learning_rate": 0.00017107583059031975, "loss": 0.853, "step": 75380 }, { "epoch": 1.3235836303305888, "grad_norm": 0.05744516061685842, "learning_rate": 0.00017106795855023683, "loss": 0.8523, "step": 75390 }, { "epoch": 1.32375919521059, "grad_norm": 0.06223374103100518, "learning_rate": 0.00017106008562237942, "loss": 0.8433, "step": 75400 }, { "epoch": 1.3239347600905915, "grad_norm": 0.0452022893483144, "learning_rate": 0.0001710522118068473, "loss": 0.8495, "step": 75410 }, { "epoch": 1.3241103249705928, "grad_norm": 0.06349290288444069, "learning_rate": 0.0001710443371037402, "loss": 0.8539, "step": 75420 }, { "epoch": 1.3242858898505943, "grad_norm": 0.06189853596805175, "learning_rate": 0.000171036461513158, "loss": 0.8586, "step": 75430 }, { "epoch": 1.3244614547305957, "grad_norm": 0.06463421676971393, "learning_rate": 0.00017102858503520037, "loss": 0.8507, "step": 75440 }, { "epoch": 1.3246370196105972, "grad_norm": 0.07225162696460125, "learning_rate": 0.00017102070766996714, "loss": 0.8546, "step": 75450 }, { "epoch": 1.3248125844905985, "grad_norm": 0.05373625263840432, "learning_rate": 0.0001710128294175581, "loss": 0.846, "step": 75460 }, { "epoch": 1.3249881493706, "grad_norm": 0.08647898200511674, "learning_rate": 0.00017100495027807313, "loss": 0.8524, "step": 75470 }, { "epoch": 1.3251637142506012, "grad_norm": 0.07449623536984011, "learning_rate": 0.00017099707025161203, "loss": 0.8567, "step": 75480 }, { "epoch": 1.3253392791306027, "grad_norm": 0.047321317332670074, "learning_rate": 0.0001709891893382746, "loss": 0.8454, "step": 75490 }, { "epoch": 1.3255148440106042, "grad_norm": 0.07446498584258145, "learning_rate": 0.00017098130753816074, "loss": 0.8476, "step": 75500 }, { "epoch": 1.3256904088906056, "grad_norm": 0.07777551511468009, "learning_rate": 0.00017097342485137033, "loss": 0.8497, "step": 75510 }, { "epoch": 1.3258659737706069, "grad_norm": 0.06325480480226522, "learning_rate": 0.00017096554127800319, "loss": 0.8462, "step": 75520 }, { "epoch": 1.3260415386506084, "grad_norm": 0.058233305011368905, "learning_rate": 0.00017095765681815925, "loss": 0.8536, "step": 75530 }, { "epoch": 1.3262171035306096, "grad_norm": 0.06733164301119036, "learning_rate": 0.00017094977147193838, "loss": 0.8516, "step": 75540 }, { "epoch": 1.326392668410611, "grad_norm": 0.054694509559854367, "learning_rate": 0.00017094188523944054, "loss": 0.8554, "step": 75550 }, { "epoch": 1.3265682332906126, "grad_norm": 0.06916118651118602, "learning_rate": 0.00017093399812076555, "loss": 0.8455, "step": 75560 }, { "epoch": 1.326743798170614, "grad_norm": 0.06442436998713348, "learning_rate": 0.0001709261101160134, "loss": 0.8509, "step": 75570 }, { "epoch": 1.3269193630506153, "grad_norm": 0.059234086415499675, "learning_rate": 0.00017091822122528404, "loss": 0.8469, "step": 75580 }, { "epoch": 1.3270949279306168, "grad_norm": 0.06093072134988412, "learning_rate": 0.00017091033144867745, "loss": 0.8518, "step": 75590 }, { "epoch": 1.327270492810618, "grad_norm": 0.08349800293961679, "learning_rate": 0.00017090244078629357, "loss": 0.8504, "step": 75600 }, { "epoch": 1.3274460576906195, "grad_norm": 0.07547420938459602, "learning_rate": 0.0001708945492382323, "loss": 0.8456, "step": 75610 }, { "epoch": 1.327621622570621, "grad_norm": 0.06560657899751278, "learning_rate": 0.00017088665680459376, "loss": 0.8436, "step": 75620 }, { "epoch": 1.3277971874506225, "grad_norm": 0.05321532587251813, "learning_rate": 0.00017087876348547787, "loss": 0.8514, "step": 75630 }, { "epoch": 1.3279727523306237, "grad_norm": 0.05100014425663975, "learning_rate": 0.00017087086928098462, "loss": 0.8519, "step": 75640 }, { "epoch": 1.3281483172106252, "grad_norm": 0.05090774867351129, "learning_rate": 0.0001708629741912141, "loss": 0.8481, "step": 75650 }, { "epoch": 1.3283238820906265, "grad_norm": 0.06856145094219757, "learning_rate": 0.0001708550782162663, "loss": 0.8471, "step": 75660 }, { "epoch": 1.328499446970628, "grad_norm": 0.06877078546870233, "learning_rate": 0.0001708471813562413, "loss": 0.8523, "step": 75670 }, { "epoch": 1.3286750118506294, "grad_norm": 0.05707509016034957, "learning_rate": 0.0001708392836112391, "loss": 0.8493, "step": 75680 }, { "epoch": 1.328850576730631, "grad_norm": 0.06363413630117916, "learning_rate": 0.00017083138498135976, "loss": 0.8437, "step": 75690 }, { "epoch": 1.3290261416106322, "grad_norm": 0.07496775475136136, "learning_rate": 0.00017082348546670345, "loss": 0.8468, "step": 75700 }, { "epoch": 1.3292017064906336, "grad_norm": 0.062188008819777936, "learning_rate": 0.00017081558506737014, "loss": 0.8507, "step": 75710 }, { "epoch": 1.3293772713706349, "grad_norm": 0.053570335550356896, "learning_rate": 0.00017080768378346, "loss": 0.8438, "step": 75720 }, { "epoch": 1.3295528362506364, "grad_norm": 0.0562176269983682, "learning_rate": 0.00017079978161507316, "loss": 0.8563, "step": 75730 }, { "epoch": 1.3297284011306378, "grad_norm": 0.06544673077535408, "learning_rate": 0.0001707918785623097, "loss": 0.8517, "step": 75740 }, { "epoch": 1.3299039660106393, "grad_norm": 0.04711214661359434, "learning_rate": 0.00017078397462526974, "loss": 0.8537, "step": 75750 }, { "epoch": 1.3300795308906406, "grad_norm": 0.07359694691080831, "learning_rate": 0.00017077606980405344, "loss": 0.851, "step": 75760 }, { "epoch": 1.330255095770642, "grad_norm": 0.06915889132716112, "learning_rate": 0.00017076816409876102, "loss": 0.8495, "step": 75770 }, { "epoch": 1.3304306606506435, "grad_norm": 0.052207577610965276, "learning_rate": 0.00017076025750949257, "loss": 0.8478, "step": 75780 }, { "epoch": 1.3306062255306448, "grad_norm": 0.07479050941318063, "learning_rate": 0.00017075235003634828, "loss": 0.8476, "step": 75790 }, { "epoch": 1.3307817904106463, "grad_norm": 0.08587781839816346, "learning_rate": 0.00017074444167942832, "loss": 0.8444, "step": 75800 }, { "epoch": 1.3309573552906477, "grad_norm": 0.05309213701256835, "learning_rate": 0.00017073653243883293, "loss": 0.8486, "step": 75810 }, { "epoch": 1.331132920170649, "grad_norm": 0.07615311936783091, "learning_rate": 0.00017072862231466232, "loss": 0.8437, "step": 75820 }, { "epoch": 1.3313084850506505, "grad_norm": 0.045941045711390203, "learning_rate": 0.0001707207113070167, "loss": 0.8433, "step": 75830 }, { "epoch": 1.331484049930652, "grad_norm": 0.04574417788801244, "learning_rate": 0.00017071279941599632, "loss": 0.8503, "step": 75840 }, { "epoch": 1.3316596148106532, "grad_norm": 0.07005790519832959, "learning_rate": 0.0001707048866417014, "loss": 0.8543, "step": 75850 }, { "epoch": 1.3318351796906547, "grad_norm": 0.07006032718967621, "learning_rate": 0.00017069697298423219, "loss": 0.851, "step": 75860 }, { "epoch": 1.3320107445706562, "grad_norm": 0.046020134988948413, "learning_rate": 0.00017068905844368895, "loss": 0.841, "step": 75870 }, { "epoch": 1.3321863094506574, "grad_norm": 0.05012177538714212, "learning_rate": 0.00017068114302017204, "loss": 0.851, "step": 75880 }, { "epoch": 1.332361874330659, "grad_norm": 0.05983242716382999, "learning_rate": 0.0001706732267137817, "loss": 0.8534, "step": 75890 }, { "epoch": 1.3325374392106604, "grad_norm": 0.06127554639818103, "learning_rate": 0.00017066530952461817, "loss": 0.8505, "step": 75900 }, { "epoch": 1.3327130040906616, "grad_norm": 0.08113342718931067, "learning_rate": 0.00017065739145278187, "loss": 0.8499, "step": 75910 }, { "epoch": 1.332888568970663, "grad_norm": 0.07558165263718662, "learning_rate": 0.00017064947249837304, "loss": 0.8444, "step": 75920 }, { "epoch": 1.3330641338506646, "grad_norm": 0.08325143110054631, "learning_rate": 0.00017064155266149203, "loss": 0.8467, "step": 75930 }, { "epoch": 1.333239698730666, "grad_norm": 0.07038032390775617, "learning_rate": 0.00017063363194223924, "loss": 0.8571, "step": 75940 }, { "epoch": 1.3334152636106673, "grad_norm": 0.05445567527150817, "learning_rate": 0.00017062571034071497, "loss": 0.8475, "step": 75950 }, { "epoch": 1.3335908284906688, "grad_norm": 0.055884008247775695, "learning_rate": 0.0001706177878570196, "loss": 0.8545, "step": 75960 }, { "epoch": 1.33376639337067, "grad_norm": 0.06862368011146021, "learning_rate": 0.00017060986449125352, "loss": 0.8534, "step": 75970 }, { "epoch": 1.3339419582506715, "grad_norm": 0.0601197366141486, "learning_rate": 0.00017060194024351712, "loss": 0.8465, "step": 75980 }, { "epoch": 1.334117523130673, "grad_norm": 0.08184159845634237, "learning_rate": 0.00017059401511391081, "loss": 0.8517, "step": 75990 }, { "epoch": 1.3342930880106745, "grad_norm": 0.0466538337877709, "learning_rate": 0.00017058608910253497, "loss": 0.854, "step": 76000 }, { "epoch": 1.3344686528906757, "grad_norm": 0.06019853392624011, "learning_rate": 0.00017057816220949007, "loss": 0.8513, "step": 76010 }, { "epoch": 1.3346442177706772, "grad_norm": 0.07320400159744017, "learning_rate": 0.0001705702344348765, "loss": 0.8393, "step": 76020 }, { "epoch": 1.3348197826506785, "grad_norm": 0.05348825942682483, "learning_rate": 0.00017056230577879472, "loss": 0.8528, "step": 76030 }, { "epoch": 1.33499534753068, "grad_norm": 0.08115482382279772, "learning_rate": 0.00017055437624134522, "loss": 0.8513, "step": 76040 }, { "epoch": 1.3351709124106814, "grad_norm": 0.049519905566244936, "learning_rate": 0.0001705464458226284, "loss": 0.8481, "step": 76050 }, { "epoch": 1.335346477290683, "grad_norm": 0.0648811274252808, "learning_rate": 0.00017053851452274483, "loss": 0.8534, "step": 76060 }, { "epoch": 1.3355220421706842, "grad_norm": 0.05307638407914913, "learning_rate": 0.00017053058234179494, "loss": 0.8474, "step": 76070 }, { "epoch": 1.3356976070506856, "grad_norm": 0.04967201707323457, "learning_rate": 0.00017052264927987923, "loss": 0.8473, "step": 76080 }, { "epoch": 1.3358731719306869, "grad_norm": 0.058447173382868076, "learning_rate": 0.00017051471533709823, "loss": 0.8485, "step": 76090 }, { "epoch": 1.3360487368106884, "grad_norm": 0.04825286360093728, "learning_rate": 0.00017050678051355248, "loss": 0.8506, "step": 76100 }, { "epoch": 1.3362243016906898, "grad_norm": 0.0647530000961546, "learning_rate": 0.00017049884480934254, "loss": 0.8497, "step": 76110 }, { "epoch": 1.3363998665706913, "grad_norm": 0.06337974076826894, "learning_rate": 0.00017049090822456882, "loss": 0.8419, "step": 76120 }, { "epoch": 1.3365754314506926, "grad_norm": 0.06425865116558935, "learning_rate": 0.00017048297075933206, "loss": 0.8467, "step": 76130 }, { "epoch": 1.336750996330694, "grad_norm": 0.06962871000513726, "learning_rate": 0.0001704750324137327, "loss": 0.8437, "step": 76140 }, { "epoch": 1.3369265612106953, "grad_norm": 0.04534623110728847, "learning_rate": 0.00017046709318787137, "loss": 0.856, "step": 76150 }, { "epoch": 1.3371021260906968, "grad_norm": 0.07571862872538243, "learning_rate": 0.00017045915308184866, "loss": 0.8434, "step": 76160 }, { "epoch": 1.3372776909706983, "grad_norm": 0.07328025247303306, "learning_rate": 0.00017045121209576517, "loss": 0.8558, "step": 76170 }, { "epoch": 1.3374532558506997, "grad_norm": 0.07297489631849996, "learning_rate": 0.00017044327022972151, "loss": 0.8459, "step": 76180 }, { "epoch": 1.337628820730701, "grad_norm": 0.05811021033922465, "learning_rate": 0.0001704353274838183, "loss": 0.8572, "step": 76190 }, { "epoch": 1.3378043856107025, "grad_norm": 0.08842322468962704, "learning_rate": 0.00017042738385815618, "loss": 0.8466, "step": 76200 }, { "epoch": 1.3379799504907037, "grad_norm": 0.04836864938520365, "learning_rate": 0.00017041943935283585, "loss": 0.8539, "step": 76210 }, { "epoch": 1.3381555153707052, "grad_norm": 0.04419167066462444, "learning_rate": 0.00017041149396795788, "loss": 0.8431, "step": 76220 }, { "epoch": 1.3383310802507067, "grad_norm": 0.05199425936771737, "learning_rate": 0.000170403547703623, "loss": 0.8484, "step": 76230 }, { "epoch": 1.3385066451307082, "grad_norm": 0.06446656781100452, "learning_rate": 0.00017039560055993183, "loss": 0.8612, "step": 76240 }, { "epoch": 1.3386822100107094, "grad_norm": 0.05556019184811688, "learning_rate": 0.00017038765253698512, "loss": 0.8442, "step": 76250 }, { "epoch": 1.338857774890711, "grad_norm": 0.05739764597300805, "learning_rate": 0.00017037970363488353, "loss": 0.8486, "step": 76260 }, { "epoch": 1.3390333397707121, "grad_norm": 0.076713731443272, "learning_rate": 0.00017037175385372787, "loss": 0.8604, "step": 76270 }, { "epoch": 1.3392089046507136, "grad_norm": 0.06518968457612513, "learning_rate": 0.00017036380319361875, "loss": 0.8434, "step": 76280 }, { "epoch": 1.339384469530715, "grad_norm": 0.06619447323858677, "learning_rate": 0.00017035585165465692, "loss": 0.8555, "step": 76290 }, { "epoch": 1.3395600344107166, "grad_norm": 0.0703916621682637, "learning_rate": 0.00017034789923694318, "loss": 0.8362, "step": 76300 }, { "epoch": 1.3397355992907178, "grad_norm": 0.08242011474274821, "learning_rate": 0.00017033994594057826, "loss": 0.8523, "step": 76310 }, { "epoch": 1.3399111641707193, "grad_norm": 0.04911882165450667, "learning_rate": 0.00017033199176566297, "loss": 0.8432, "step": 76320 }, { "epoch": 1.3400867290507206, "grad_norm": 0.07464336878599503, "learning_rate": 0.00017032403671229801, "loss": 0.8481, "step": 76330 }, { "epoch": 1.340262293930722, "grad_norm": 0.05820566205058757, "learning_rate": 0.00017031608078058426, "loss": 0.8501, "step": 76340 }, { "epoch": 1.3404378588107235, "grad_norm": 0.053707217514421195, "learning_rate": 0.00017030812397062251, "loss": 0.8466, "step": 76350 }, { "epoch": 1.340613423690725, "grad_norm": 0.05837142204913718, "learning_rate": 0.0001703001662825135, "loss": 0.8491, "step": 76360 }, { "epoch": 1.3407889885707263, "grad_norm": 0.04841375908626427, "learning_rate": 0.00017029220771635809, "loss": 0.8416, "step": 76370 }, { "epoch": 1.3409645534507277, "grad_norm": 0.05223501913056539, "learning_rate": 0.00017028424827225712, "loss": 0.8463, "step": 76380 }, { "epoch": 1.341140118330729, "grad_norm": 0.061536248868039635, "learning_rate": 0.00017027628795031148, "loss": 0.8488, "step": 76390 }, { "epoch": 1.3413156832107305, "grad_norm": 0.05147271100695557, "learning_rate": 0.00017026832675062198, "loss": 0.8509, "step": 76400 }, { "epoch": 1.341491248090732, "grad_norm": 0.061417981128669426, "learning_rate": 0.0001702603646732895, "loss": 0.8498, "step": 76410 }, { "epoch": 1.3416668129707334, "grad_norm": 0.05057510122620781, "learning_rate": 0.00017025240171841496, "loss": 0.8509, "step": 76420 }, { "epoch": 1.3418423778507347, "grad_norm": 0.06339593600690849, "learning_rate": 0.00017024443788609918, "loss": 0.846, "step": 76430 }, { "epoch": 1.3420179427307362, "grad_norm": 0.05580376138723276, "learning_rate": 0.0001702364731764431, "loss": 0.8422, "step": 76440 }, { "epoch": 1.3421935076107374, "grad_norm": 0.05375265090117153, "learning_rate": 0.00017022850758954767, "loss": 0.8519, "step": 76450 }, { "epoch": 1.342369072490739, "grad_norm": 0.09189003927237982, "learning_rate": 0.0001702205411255138, "loss": 0.85, "step": 76460 }, { "epoch": 1.3425446373707404, "grad_norm": 0.058164689467708613, "learning_rate": 0.00017021257378444237, "loss": 0.8461, "step": 76470 }, { "epoch": 1.3427202022507418, "grad_norm": 0.05274709850988405, "learning_rate": 0.00017020460556643432, "loss": 0.8459, "step": 76480 }, { "epoch": 1.342895767130743, "grad_norm": 0.0932106435330047, "learning_rate": 0.00017019663647159072, "loss": 0.8537, "step": 76490 }, { "epoch": 1.3430713320107446, "grad_norm": 0.06748810469488133, "learning_rate": 0.00017018866650001245, "loss": 0.8567, "step": 76500 }, { "epoch": 1.343246896890746, "grad_norm": 0.06573077822024427, "learning_rate": 0.00017018069565180051, "loss": 0.8522, "step": 76510 }, { "epoch": 1.3434224617707473, "grad_norm": 0.05828187177767365, "learning_rate": 0.0001701727239270559, "loss": 0.8544, "step": 76520 }, { "epoch": 1.3435980266507488, "grad_norm": 0.05428087472777464, "learning_rate": 0.00017016475132587965, "loss": 0.8576, "step": 76530 }, { "epoch": 1.3437735915307503, "grad_norm": 0.062060826918511655, "learning_rate": 0.0001701567778483727, "loss": 0.8516, "step": 76540 }, { "epoch": 1.3439491564107515, "grad_norm": 0.058556008923173905, "learning_rate": 0.00017014880349463615, "loss": 0.8566, "step": 76550 }, { "epoch": 1.344124721290753, "grad_norm": 0.08347779485169887, "learning_rate": 0.000170140828264771, "loss": 0.8527, "step": 76560 }, { "epoch": 1.3443002861707545, "grad_norm": 0.054339431134023335, "learning_rate": 0.0001701328521588783, "loss": 0.8497, "step": 76570 }, { "epoch": 1.3444758510507557, "grad_norm": 0.053783094953709996, "learning_rate": 0.00017012487517705912, "loss": 0.8544, "step": 76580 }, { "epoch": 1.3446514159307572, "grad_norm": 0.06633683960824416, "learning_rate": 0.00017011689731941454, "loss": 0.845, "step": 76590 }, { "epoch": 1.3448269808107587, "grad_norm": 0.09120141373478864, "learning_rate": 0.00017010891858604564, "loss": 0.8556, "step": 76600 }, { "epoch": 1.34500254569076, "grad_norm": 0.08675100934244405, "learning_rate": 0.00017010093897705343, "loss": 0.8544, "step": 76610 }, { "epoch": 1.3451781105707614, "grad_norm": 0.059662188467441195, "learning_rate": 0.00017009295849253913, "loss": 0.8487, "step": 76620 }, { "epoch": 1.345353675450763, "grad_norm": 0.05913960394102192, "learning_rate": 0.00017008497713260379, "loss": 0.8443, "step": 76630 }, { "epoch": 1.3455292403307642, "grad_norm": 0.0652673010964782, "learning_rate": 0.00017007699489734855, "loss": 0.8508, "step": 76640 }, { "epoch": 1.3457048052107656, "grad_norm": 0.0540638926478725, "learning_rate": 0.00017006901178687455, "loss": 0.8484, "step": 76650 }, { "epoch": 1.345880370090767, "grad_norm": 0.061786724790206055, "learning_rate": 0.00017006102780128292, "loss": 0.8521, "step": 76660 }, { "epoch": 1.3460559349707686, "grad_norm": 0.06185661148687561, "learning_rate": 0.00017005304294067484, "loss": 0.8521, "step": 76670 }, { "epoch": 1.3462314998507698, "grad_norm": 0.05986047037967302, "learning_rate": 0.00017004505720515147, "loss": 0.8517, "step": 76680 }, { "epoch": 1.3464070647307713, "grad_norm": 0.07367067556060511, "learning_rate": 0.000170037070594814, "loss": 0.8488, "step": 76690 }, { "epoch": 1.3465826296107726, "grad_norm": 0.05538555228845806, "learning_rate": 0.00017002908310976363, "loss": 0.8531, "step": 76700 }, { "epoch": 1.346758194490774, "grad_norm": 0.08513335301801978, "learning_rate": 0.0001700210947501015, "loss": 0.8465, "step": 76710 }, { "epoch": 1.3469337593707755, "grad_norm": 0.07167744426166522, "learning_rate": 0.00017001310551592892, "loss": 0.8431, "step": 76720 }, { "epoch": 1.347109324250777, "grad_norm": 0.06990942488104501, "learning_rate": 0.00017000511540734705, "loss": 0.8449, "step": 76730 }, { "epoch": 1.3472848891307783, "grad_norm": 0.06523161773889964, "learning_rate": 0.00016999712442445714, "loss": 0.849, "step": 76740 }, { "epoch": 1.3474604540107797, "grad_norm": 0.06150755775330128, "learning_rate": 0.0001699891325673604, "loss": 0.8494, "step": 76750 }, { "epoch": 1.347636018890781, "grad_norm": 0.04897897083963353, "learning_rate": 0.0001699811398361582, "loss": 0.8453, "step": 76760 }, { "epoch": 1.3478115837707825, "grad_norm": 0.060374938222906314, "learning_rate": 0.0001699731462309517, "loss": 0.8457, "step": 76770 }, { "epoch": 1.347987148650784, "grad_norm": 0.04244892279966634, "learning_rate": 0.00016996515175184222, "loss": 0.8408, "step": 76780 }, { "epoch": 1.3481627135307854, "grad_norm": 0.04124124520783707, "learning_rate": 0.0001699571563989311, "loss": 0.844, "step": 76790 }, { "epoch": 1.3483382784107867, "grad_norm": 0.05397345545417103, "learning_rate": 0.00016994916017231952, "loss": 0.8478, "step": 76800 }, { "epoch": 1.3485138432907882, "grad_norm": 0.055716279968680754, "learning_rate": 0.0001699411630721089, "loss": 0.8558, "step": 76810 }, { "epoch": 1.3486894081707894, "grad_norm": 0.04534451655700431, "learning_rate": 0.00016993316509840054, "loss": 0.8563, "step": 76820 }, { "epoch": 1.348864973050791, "grad_norm": 0.04965604611393762, "learning_rate": 0.00016992516625129574, "loss": 0.8428, "step": 76830 }, { "epoch": 1.3490405379307924, "grad_norm": 0.06390435864421967, "learning_rate": 0.00016991716653089588, "loss": 0.8467, "step": 76840 }, { "epoch": 1.3492161028107938, "grad_norm": 0.07046953863498778, "learning_rate": 0.00016990916593730233, "loss": 0.8493, "step": 76850 }, { "epoch": 1.349391667690795, "grad_norm": 0.07536902552960514, "learning_rate": 0.00016990116447061646, "loss": 0.8448, "step": 76860 }, { "epoch": 1.3495672325707966, "grad_norm": 0.09092531567056232, "learning_rate": 0.0001698931621309396, "loss": 0.8498, "step": 76870 }, { "epoch": 1.3497427974507978, "grad_norm": 0.07832628141652447, "learning_rate": 0.00016988515891837316, "loss": 0.8502, "step": 76880 }, { "epoch": 1.3499183623307993, "grad_norm": 0.0708606066921496, "learning_rate": 0.00016987715483301859, "loss": 0.8545, "step": 76890 }, { "epoch": 1.3500939272108008, "grad_norm": 0.06673619377781896, "learning_rate": 0.0001698691498749772, "loss": 0.8548, "step": 76900 }, { "epoch": 1.3502694920908023, "grad_norm": 0.06747472718020812, "learning_rate": 0.00016986114404435058, "loss": 0.8512, "step": 76910 }, { "epoch": 1.3504450569708035, "grad_norm": 0.07651785789599742, "learning_rate": 0.00016985313734124, "loss": 0.8458, "step": 76920 }, { "epoch": 1.350620621850805, "grad_norm": 0.055338728097640316, "learning_rate": 0.000169845129765747, "loss": 0.8463, "step": 76930 }, { "epoch": 1.3507961867308063, "grad_norm": 0.053294112399295705, "learning_rate": 0.000169837121317973, "loss": 0.8455, "step": 76940 }, { "epoch": 1.3509717516108077, "grad_norm": 0.08854519767401693, "learning_rate": 0.00016982911199801948, "loss": 0.8436, "step": 76950 }, { "epoch": 1.3511473164908092, "grad_norm": 0.05125014610352081, "learning_rate": 0.00016982110180598791, "loss": 0.8439, "step": 76960 }, { "epoch": 1.3513228813708107, "grad_norm": 0.06225603316685289, "learning_rate": 0.00016981309074197984, "loss": 0.8512, "step": 76970 }, { "epoch": 1.351498446250812, "grad_norm": 0.055202896452329814, "learning_rate": 0.0001698050788060967, "loss": 0.8444, "step": 76980 }, { "epoch": 1.3516740111308134, "grad_norm": 0.06599033296567273, "learning_rate": 0.00016979706599844, "loss": 0.8534, "step": 76990 }, { "epoch": 1.3518495760108147, "grad_norm": 0.07064819781330606, "learning_rate": 0.00016978905231911125, "loss": 0.8524, "step": 77000 }, { "epoch": 1.3520251408908162, "grad_norm": 0.0956589708059054, "learning_rate": 0.0001697810377682121, "loss": 0.853, "step": 77010 }, { "epoch": 1.3522007057708176, "grad_norm": 0.05510510695801866, "learning_rate": 0.00016977302234584402, "loss": 0.8455, "step": 77020 }, { "epoch": 1.3523762706508191, "grad_norm": 0.07306430700051864, "learning_rate": 0.00016976500605210855, "loss": 0.8453, "step": 77030 }, { "epoch": 1.3525518355308204, "grad_norm": 0.07095901523344574, "learning_rate": 0.00016975698888710726, "loss": 0.85, "step": 77040 }, { "epoch": 1.3527274004108218, "grad_norm": 0.04826415536486713, "learning_rate": 0.00016974897085094174, "loss": 0.8409, "step": 77050 }, { "epoch": 1.352902965290823, "grad_norm": 0.04802517430766489, "learning_rate": 0.00016974095194371356, "loss": 0.8453, "step": 77060 }, { "epoch": 1.3530785301708246, "grad_norm": 0.05923635639300022, "learning_rate": 0.00016973293216552442, "loss": 0.8486, "step": 77070 }, { "epoch": 1.353254095050826, "grad_norm": 0.05342970221156565, "learning_rate": 0.0001697249115164758, "loss": 0.8414, "step": 77080 }, { "epoch": 1.3534296599308275, "grad_norm": 0.06680001977592652, "learning_rate": 0.0001697168899966694, "loss": 0.8519, "step": 77090 }, { "epoch": 1.3536052248108288, "grad_norm": 0.05140791246912955, "learning_rate": 0.00016970886760620684, "loss": 0.852, "step": 77100 }, { "epoch": 1.3537807896908303, "grad_norm": 0.06006985797212989, "learning_rate": 0.00016970084434518974, "loss": 0.8497, "step": 77110 }, { "epoch": 1.3539563545708315, "grad_norm": 0.05360580503725353, "learning_rate": 0.00016969282021371977, "loss": 0.8502, "step": 77120 }, { "epoch": 1.354131919450833, "grad_norm": 0.05295761543253292, "learning_rate": 0.00016968479521189863, "loss": 0.8512, "step": 77130 }, { "epoch": 1.3543074843308345, "grad_norm": 0.06180507383384494, "learning_rate": 0.00016967676933982793, "loss": 0.8529, "step": 77140 }, { "epoch": 1.354483049210836, "grad_norm": 0.06834841656389164, "learning_rate": 0.00016966874259760948, "loss": 0.8515, "step": 77150 }, { "epoch": 1.3546586140908372, "grad_norm": 0.05407450520433601, "learning_rate": 0.00016966071498534483, "loss": 0.8448, "step": 77160 }, { "epoch": 1.3548341789708387, "grad_norm": 0.061715643650045636, "learning_rate": 0.00016965268650313579, "loss": 0.8486, "step": 77170 }, { "epoch": 1.35500974385084, "grad_norm": 0.06352405207264442, "learning_rate": 0.00016964465715108403, "loss": 0.8441, "step": 77180 }, { "epoch": 1.3551853087308414, "grad_norm": 0.06811848508400585, "learning_rate": 0.00016963662692929133, "loss": 0.8472, "step": 77190 }, { "epoch": 1.355360873610843, "grad_norm": 0.060795810569747075, "learning_rate": 0.00016962859583785942, "loss": 0.851, "step": 77200 }, { "epoch": 1.3555364384908444, "grad_norm": 0.05265867131524301, "learning_rate": 0.00016962056387689005, "loss": 0.8485, "step": 77210 }, { "epoch": 1.3557120033708456, "grad_norm": 0.053652829295225364, "learning_rate": 0.00016961253104648495, "loss": 0.8505, "step": 77220 }, { "epoch": 1.355887568250847, "grad_norm": 0.05556272225996789, "learning_rate": 0.00016960449734674596, "loss": 0.8475, "step": 77230 }, { "epoch": 1.3560631331308486, "grad_norm": 0.07485061696827737, "learning_rate": 0.00016959646277777484, "loss": 0.8563, "step": 77240 }, { "epoch": 1.3562386980108498, "grad_norm": 0.06761099736457528, "learning_rate": 0.00016958842733967336, "loss": 0.8507, "step": 77250 }, { "epoch": 1.3564142628908513, "grad_norm": 0.051963337824891855, "learning_rate": 0.0001695803910325434, "loss": 0.8478, "step": 77260 }, { "epoch": 1.3565898277708528, "grad_norm": 0.10217879464544954, "learning_rate": 0.00016957235385648672, "loss": 0.8619, "step": 77270 }, { "epoch": 1.356765392650854, "grad_norm": 0.06463708975984911, "learning_rate": 0.0001695643158116052, "loss": 0.8501, "step": 77280 }, { "epoch": 1.3569409575308555, "grad_norm": 0.08990834349000446, "learning_rate": 0.00016955627689800064, "loss": 0.8363, "step": 77290 }, { "epoch": 1.357116522410857, "grad_norm": 0.05534726517735422, "learning_rate": 0.00016954823711577493, "loss": 0.8519, "step": 77300 }, { "epoch": 1.3572920872908583, "grad_norm": 0.05613175695742679, "learning_rate": 0.00016954019646502988, "loss": 0.849, "step": 77310 }, { "epoch": 1.3574676521708597, "grad_norm": 0.05925591893250529, "learning_rate": 0.00016953215494586743, "loss": 0.851, "step": 77320 }, { "epoch": 1.3576432170508612, "grad_norm": 0.06729646428060317, "learning_rate": 0.00016952411255838942, "loss": 0.8575, "step": 77330 }, { "epoch": 1.3578187819308625, "grad_norm": 0.05078430710988355, "learning_rate": 0.00016951606930269778, "loss": 0.8511, "step": 77340 }, { "epoch": 1.357994346810864, "grad_norm": 0.07373589279231717, "learning_rate": 0.00016950802517889444, "loss": 0.8377, "step": 77350 }, { "epoch": 1.3581699116908654, "grad_norm": 0.06603925198537992, "learning_rate": 0.00016949998018708128, "loss": 0.8584, "step": 77360 }, { "epoch": 1.3583454765708667, "grad_norm": 0.08102534341847584, "learning_rate": 0.00016949193432736027, "loss": 0.85, "step": 77370 }, { "epoch": 1.3585210414508682, "grad_norm": 0.0785996467269745, "learning_rate": 0.00016948388759983326, "loss": 0.8411, "step": 77380 }, { "epoch": 1.3586966063308696, "grad_norm": 0.06094132329848468, "learning_rate": 0.0001694758400046023, "loss": 0.8522, "step": 77390 }, { "epoch": 1.3588721712108711, "grad_norm": 0.0748571035400303, "learning_rate": 0.00016946779154176933, "loss": 0.8527, "step": 77400 }, { "epoch": 1.3590477360908724, "grad_norm": 0.06438524377058397, "learning_rate": 0.00016945974221143633, "loss": 0.8455, "step": 77410 }, { "epoch": 1.3592233009708738, "grad_norm": 0.06643832692718347, "learning_rate": 0.00016945169201370525, "loss": 0.8494, "step": 77420 }, { "epoch": 1.359398865850875, "grad_norm": 0.047677903334280375, "learning_rate": 0.0001694436409486781, "loss": 0.842, "step": 77430 }, { "epoch": 1.3595744307308766, "grad_norm": 0.08023700700260031, "learning_rate": 0.00016943558901645693, "loss": 0.8549, "step": 77440 }, { "epoch": 1.359749995610878, "grad_norm": 0.059473165097926946, "learning_rate": 0.00016942753621714373, "loss": 0.8559, "step": 77450 }, { "epoch": 1.3599255604908795, "grad_norm": 0.052929993658826606, "learning_rate": 0.0001694194825508405, "loss": 0.8446, "step": 77460 }, { "epoch": 1.3601011253708808, "grad_norm": 0.057286133003299204, "learning_rate": 0.0001694114280176494, "loss": 0.8513, "step": 77470 }, { "epoch": 1.3602766902508823, "grad_norm": 0.055338717447643675, "learning_rate": 0.00016940337261767233, "loss": 0.8504, "step": 77480 }, { "epoch": 1.3604522551308835, "grad_norm": 0.05214022713582364, "learning_rate": 0.00016939531635101144, "loss": 0.8475, "step": 77490 }, { "epoch": 1.360627820010885, "grad_norm": 0.058114927216248245, "learning_rate": 0.00016938725921776873, "loss": 0.8438, "step": 77500 }, { "epoch": 1.3608033848908865, "grad_norm": 0.0796204862589523, "learning_rate": 0.0001693792012180464, "loss": 0.8532, "step": 77510 }, { "epoch": 1.360978949770888, "grad_norm": 0.07219669887346077, "learning_rate": 0.00016937114235194648, "loss": 0.8552, "step": 77520 }, { "epoch": 1.3611545146508892, "grad_norm": 0.06239055390052521, "learning_rate": 0.00016936308261957105, "loss": 0.8476, "step": 77530 }, { "epoch": 1.3613300795308907, "grad_norm": 0.07264147397367343, "learning_rate": 0.0001693550220210223, "loss": 0.8512, "step": 77540 }, { "epoch": 1.361505644410892, "grad_norm": 0.056825252765985046, "learning_rate": 0.0001693469605564023, "loss": 0.8491, "step": 77550 }, { "epoch": 1.3616812092908934, "grad_norm": 0.06613718278915062, "learning_rate": 0.00016933889822581322, "loss": 0.8527, "step": 77560 }, { "epoch": 1.361856774170895, "grad_norm": 0.07038071868797745, "learning_rate": 0.0001693308350293572, "loss": 0.8523, "step": 77570 }, { "epoch": 1.3620323390508964, "grad_norm": 0.04839875297650354, "learning_rate": 0.0001693227709671364, "loss": 0.8437, "step": 77580 }, { "epoch": 1.3622079039308976, "grad_norm": 0.06047923139230421, "learning_rate": 0.000169314706039253, "loss": 0.8576, "step": 77590 }, { "epoch": 1.3623834688108991, "grad_norm": 0.050889296224315955, "learning_rate": 0.00016930664024580918, "loss": 0.8499, "step": 77600 }, { "epoch": 1.3625590336909004, "grad_norm": 0.06252619914740223, "learning_rate": 0.00016929857358690716, "loss": 0.8558, "step": 77610 }, { "epoch": 1.3627345985709018, "grad_norm": 0.057443839018566936, "learning_rate": 0.00016929050606264906, "loss": 0.842, "step": 77620 }, { "epoch": 1.3629101634509033, "grad_norm": 0.05245679647704677, "learning_rate": 0.00016928243767313722, "loss": 0.8497, "step": 77630 }, { "epoch": 1.3630857283309048, "grad_norm": 0.06112343864472478, "learning_rate": 0.0001692743684184738, "loss": 0.8422, "step": 77640 }, { "epoch": 1.363261293210906, "grad_norm": 0.05505272390493824, "learning_rate": 0.00016926629829876102, "loss": 0.8505, "step": 77650 }, { "epoch": 1.3634368580909075, "grad_norm": 0.06408644970896067, "learning_rate": 0.0001692582273141012, "loss": 0.8559, "step": 77660 }, { "epoch": 1.3636124229709088, "grad_norm": 0.07729432633629606, "learning_rate": 0.0001692501554645965, "loss": 0.8413, "step": 77670 }, { "epoch": 1.3637879878509103, "grad_norm": 0.05923501133100925, "learning_rate": 0.0001692420827503493, "loss": 0.8503, "step": 77680 }, { "epoch": 1.3639635527309117, "grad_norm": 0.05254781564202942, "learning_rate": 0.0001692340091714618, "loss": 0.8569, "step": 77690 }, { "epoch": 1.3641391176109132, "grad_norm": 0.057203043334860544, "learning_rate": 0.00016922593472803635, "loss": 0.8534, "step": 77700 }, { "epoch": 1.3643146824909145, "grad_norm": 0.06154618807076067, "learning_rate": 0.00016921785942017523, "loss": 0.8557, "step": 77710 }, { "epoch": 1.364490247370916, "grad_norm": 0.06322550942302864, "learning_rate": 0.00016920978324798077, "loss": 0.8476, "step": 77720 }, { "epoch": 1.3646658122509172, "grad_norm": 0.07080704219863468, "learning_rate": 0.00016920170621155525, "loss": 0.8476, "step": 77730 }, { "epoch": 1.3648413771309187, "grad_norm": 0.048027057295499005, "learning_rate": 0.00016919362831100106, "loss": 0.8446, "step": 77740 }, { "epoch": 1.3650169420109202, "grad_norm": 0.059745663028626675, "learning_rate": 0.00016918554954642052, "loss": 0.8475, "step": 77750 }, { "epoch": 1.3651925068909216, "grad_norm": 0.07279628264267145, "learning_rate": 0.00016917746991791603, "loss": 0.8425, "step": 77760 }, { "epoch": 1.365368071770923, "grad_norm": 0.06011711753051195, "learning_rate": 0.00016916938942558989, "loss": 0.8437, "step": 77770 }, { "epoch": 1.3655436366509244, "grad_norm": 0.046114164270699774, "learning_rate": 0.00016916130806954457, "loss": 0.8472, "step": 77780 }, { "epoch": 1.3657192015309256, "grad_norm": 0.04727456792351673, "learning_rate": 0.00016915322584988235, "loss": 0.8506, "step": 77790 }, { "epoch": 1.365894766410927, "grad_norm": 0.053380966402414826, "learning_rate": 0.00016914514276670573, "loss": 0.848, "step": 77800 }, { "epoch": 1.3660703312909286, "grad_norm": 0.04401024504474506, "learning_rate": 0.0001691370588201171, "loss": 0.8504, "step": 77810 }, { "epoch": 1.36624589617093, "grad_norm": 0.061258762389955375, "learning_rate": 0.00016912897401021886, "loss": 0.8605, "step": 77820 }, { "epoch": 1.3664214610509313, "grad_norm": 0.0801380915407228, "learning_rate": 0.00016912088833711348, "loss": 0.8492, "step": 77830 }, { "epoch": 1.3665970259309328, "grad_norm": 0.08717321426198449, "learning_rate": 0.00016911280180090336, "loss": 0.8471, "step": 77840 }, { "epoch": 1.366772590810934, "grad_norm": 0.0702749143402834, "learning_rate": 0.000169104714401691, "loss": 0.8498, "step": 77850 }, { "epoch": 1.3669481556909355, "grad_norm": 0.05121893247806179, "learning_rate": 0.0001690966261395789, "loss": 0.8426, "step": 77860 }, { "epoch": 1.367123720570937, "grad_norm": 0.058579309775073814, "learning_rate": 0.00016908853701466946, "loss": 0.8515, "step": 77870 }, { "epoch": 1.3672992854509385, "grad_norm": 0.07211005277037046, "learning_rate": 0.00016908044702706519, "loss": 0.8533, "step": 77880 }, { "epoch": 1.3674748503309397, "grad_norm": 0.05607432962212349, "learning_rate": 0.00016907235617686864, "loss": 0.8525, "step": 77890 }, { "epoch": 1.3676504152109412, "grad_norm": 0.03983366930430906, "learning_rate": 0.00016906426446418226, "loss": 0.865, "step": 77900 }, { "epoch": 1.3678259800909425, "grad_norm": 0.03989745499269955, "learning_rate": 0.0001690561718891086, "loss": 0.8474, "step": 77910 }, { "epoch": 1.368001544970944, "grad_norm": 0.05726013084809209, "learning_rate": 0.00016904807845175023, "loss": 0.8484, "step": 77920 }, { "epoch": 1.3681771098509454, "grad_norm": 0.06414630025558843, "learning_rate": 0.00016903998415220966, "loss": 0.8485, "step": 77930 }, { "epoch": 1.368352674730947, "grad_norm": 0.06340501838985042, "learning_rate": 0.0001690318889905894, "loss": 0.8609, "step": 77940 }, { "epoch": 1.3685282396109482, "grad_norm": 0.04502514093642912, "learning_rate": 0.00016902379296699214, "loss": 0.8593, "step": 77950 }, { "epoch": 1.3687038044909496, "grad_norm": 0.05270780997437195, "learning_rate": 0.0001690156960815204, "loss": 0.8452, "step": 77960 }, { "epoch": 1.3688793693709511, "grad_norm": 0.07734057258630019, "learning_rate": 0.00016900759833427667, "loss": 0.8523, "step": 77970 }, { "epoch": 1.3690549342509524, "grad_norm": 0.06193493007177415, "learning_rate": 0.00016899949972536368, "loss": 0.8425, "step": 77980 }, { "epoch": 1.3692304991309538, "grad_norm": 0.06543834801697972, "learning_rate": 0.00016899140025488401, "loss": 0.8493, "step": 77990 }, { "epoch": 1.3694060640109553, "grad_norm": 0.055738484044190864, "learning_rate": 0.00016898329992294026, "loss": 0.857, "step": 78000 }, { "epoch": 1.3695816288909566, "grad_norm": 0.06086867350241575, "learning_rate": 0.00016897519872963507, "loss": 0.8393, "step": 78010 }, { "epoch": 1.369757193770958, "grad_norm": 0.06379661553479445, "learning_rate": 0.00016896709667507106, "loss": 0.8549, "step": 78020 }, { "epoch": 1.3699327586509595, "grad_norm": 0.05916035511416921, "learning_rate": 0.00016895899375935094, "loss": 0.8481, "step": 78030 }, { "epoch": 1.3701083235309608, "grad_norm": 0.050570802817114256, "learning_rate": 0.00016895088998257737, "loss": 0.8455, "step": 78040 }, { "epoch": 1.3702838884109623, "grad_norm": 0.04760793778088413, "learning_rate": 0.00016894278534485295, "loss": 0.8553, "step": 78050 }, { "epoch": 1.3704594532909637, "grad_norm": 0.057781949836057285, "learning_rate": 0.00016893467984628043, "loss": 0.843, "step": 78060 }, { "epoch": 1.370635018170965, "grad_norm": 0.054975585063359124, "learning_rate": 0.00016892657348696252, "loss": 0.842, "step": 78070 }, { "epoch": 1.3708105830509665, "grad_norm": 0.06868483194039612, "learning_rate": 0.00016891846626700187, "loss": 0.8416, "step": 78080 }, { "epoch": 1.370986147930968, "grad_norm": 0.058961101175575036, "learning_rate": 0.00016891035818650125, "loss": 0.854, "step": 78090 }, { "epoch": 1.3711617128109692, "grad_norm": 0.051174857190877714, "learning_rate": 0.0001689022492455634, "loss": 0.8546, "step": 78100 }, { "epoch": 1.3713372776909707, "grad_norm": 0.0449259419604081, "learning_rate": 0.00016889413944429097, "loss": 0.8496, "step": 78110 }, { "epoch": 1.3715128425709722, "grad_norm": 0.06023783209176246, "learning_rate": 0.00016888602878278687, "loss": 0.8432, "step": 78120 }, { "epoch": 1.3716884074509736, "grad_norm": 0.10721629460883414, "learning_rate": 0.00016887791726115373, "loss": 0.8478, "step": 78130 }, { "epoch": 1.371863972330975, "grad_norm": 0.05366890699207747, "learning_rate": 0.00016886980487949437, "loss": 0.8482, "step": 78140 }, { "epoch": 1.3720395372109764, "grad_norm": 0.07949020266171807, "learning_rate": 0.0001688616916379116, "loss": 0.8515, "step": 78150 }, { "epoch": 1.3722151020909776, "grad_norm": 0.051404069276899556, "learning_rate": 0.00016885357753650818, "loss": 0.8468, "step": 78160 }, { "epoch": 1.3723906669709791, "grad_norm": 0.06708239869208785, "learning_rate": 0.0001688454625753869, "loss": 0.8508, "step": 78170 }, { "epoch": 1.3725662318509806, "grad_norm": 0.06432576374776744, "learning_rate": 0.00016883734675465062, "loss": 0.8493, "step": 78180 }, { "epoch": 1.372741796730982, "grad_norm": 0.06073231608675784, "learning_rate": 0.00016882923007440216, "loss": 0.8518, "step": 78190 }, { "epoch": 1.3729173616109833, "grad_norm": 0.06037276622624786, "learning_rate": 0.00016882111253474434, "loss": 0.8518, "step": 78200 }, { "epoch": 1.3730929264909848, "grad_norm": 0.07632626935142091, "learning_rate": 0.00016881299413578005, "loss": 0.8504, "step": 78210 }, { "epoch": 1.373268491370986, "grad_norm": 0.06086222655743199, "learning_rate": 0.00016880487487761213, "loss": 0.8518, "step": 78220 }, { "epoch": 1.3734440562509875, "grad_norm": 0.05813675036862018, "learning_rate": 0.00016879675476034347, "loss": 0.8603, "step": 78230 }, { "epoch": 1.373619621130989, "grad_norm": 0.06269187032370047, "learning_rate": 0.0001687886337840769, "loss": 0.8489, "step": 78240 }, { "epoch": 1.3737951860109905, "grad_norm": 0.05974458192877722, "learning_rate": 0.00016878051194891533, "loss": 0.8491, "step": 78250 }, { "epoch": 1.3739707508909917, "grad_norm": 0.05404477027612948, "learning_rate": 0.00016877238925496174, "loss": 0.8513, "step": 78260 }, { "epoch": 1.3741463157709932, "grad_norm": 0.06894611829684023, "learning_rate": 0.00016876426570231894, "loss": 0.8436, "step": 78270 }, { "epoch": 1.3743218806509945, "grad_norm": 0.05591530488017604, "learning_rate": 0.0001687561412910899, "loss": 0.8459, "step": 78280 }, { "epoch": 1.374497445530996, "grad_norm": 0.059524053249916935, "learning_rate": 0.0001687480160213776, "loss": 0.8462, "step": 78290 }, { "epoch": 1.3746730104109974, "grad_norm": 0.0535092167454499, "learning_rate": 0.00016873988989328495, "loss": 0.8475, "step": 78300 }, { "epoch": 1.374848575290999, "grad_norm": 0.05926627046483781, "learning_rate": 0.00016873176290691487, "loss": 0.8472, "step": 78310 }, { "epoch": 1.3750241401710002, "grad_norm": 0.10782514148048612, "learning_rate": 0.00016872363506237044, "loss": 0.8489, "step": 78320 }, { "epoch": 1.3751997050510016, "grad_norm": 0.0591120204111127, "learning_rate": 0.00016871550635975455, "loss": 0.8491, "step": 78330 }, { "epoch": 1.375375269931003, "grad_norm": 0.0630334560042117, "learning_rate": 0.00016870737679917022, "loss": 0.8501, "step": 78340 }, { "epoch": 1.3755508348110044, "grad_norm": 0.04310383344508578, "learning_rate": 0.00016869924638072042, "loss": 0.8556, "step": 78350 }, { "epoch": 1.3757263996910059, "grad_norm": 0.056562212913292505, "learning_rate": 0.0001686911151045082, "loss": 0.8519, "step": 78360 }, { "epoch": 1.3759019645710073, "grad_norm": 0.04815072165149406, "learning_rate": 0.0001686829829706366, "loss": 0.8479, "step": 78370 }, { "epoch": 1.3760775294510086, "grad_norm": 0.07613192532276995, "learning_rate": 0.00016867484997920863, "loss": 0.845, "step": 78380 }, { "epoch": 1.37625309433101, "grad_norm": 0.06662045749042166, "learning_rate": 0.00016866671613032736, "loss": 0.845, "step": 78390 }, { "epoch": 1.3764286592110113, "grad_norm": 0.06787488412626587, "learning_rate": 0.0001686585814240958, "loss": 0.8422, "step": 78400 }, { "epoch": 1.3766042240910128, "grad_norm": 0.07269254431209486, "learning_rate": 0.00016865044586061704, "loss": 0.8539, "step": 78410 }, { "epoch": 1.3767797889710143, "grad_norm": 0.065960595136813, "learning_rate": 0.00016864230943999418, "loss": 0.8515, "step": 78420 }, { "epoch": 1.3769553538510158, "grad_norm": 0.053199605071870756, "learning_rate": 0.0001686341721623303, "loss": 0.8478, "step": 78430 }, { "epoch": 1.377130918731017, "grad_norm": 0.09903780380541713, "learning_rate": 0.00016862603402772848, "loss": 0.8479, "step": 78440 }, { "epoch": 1.3773064836110185, "grad_norm": 0.0964481765377408, "learning_rate": 0.00016861789503629186, "loss": 0.8427, "step": 78450 }, { "epoch": 1.3774820484910197, "grad_norm": 0.07210780299256254, "learning_rate": 0.00016860975518812355, "loss": 0.8513, "step": 78460 }, { "epoch": 1.3776576133710212, "grad_norm": 0.06887212360926338, "learning_rate": 0.00016860161448332668, "loss": 0.8608, "step": 78470 }, { "epoch": 1.3778331782510227, "grad_norm": 0.07959448982723166, "learning_rate": 0.0001685934729220044, "loss": 0.8528, "step": 78480 }, { "epoch": 1.3780087431310242, "grad_norm": 0.06151981045777631, "learning_rate": 0.00016858533050425988, "loss": 0.8566, "step": 78490 }, { "epoch": 1.3781843080110254, "grad_norm": 0.05873830592665453, "learning_rate": 0.00016857718723019627, "loss": 0.8384, "step": 78500 }, { "epoch": 1.378359872891027, "grad_norm": 0.05019969525747996, "learning_rate": 0.00016856904309991673, "loss": 0.8426, "step": 78510 }, { "epoch": 1.3785354377710282, "grad_norm": 0.05101375431376813, "learning_rate": 0.00016856089811352447, "loss": 0.8453, "step": 78520 }, { "epoch": 1.3787110026510296, "grad_norm": 0.08879430056455936, "learning_rate": 0.00016855275227112265, "loss": 0.85, "step": 78530 }, { "epoch": 1.3788865675310311, "grad_norm": 0.05383868882782366, "learning_rate": 0.00016854460557281454, "loss": 0.8468, "step": 78540 }, { "epoch": 1.3790621324110326, "grad_norm": 0.05509925725492399, "learning_rate": 0.00016853645801870337, "loss": 0.851, "step": 78550 }, { "epoch": 1.3792376972910338, "grad_norm": 0.07711127583894853, "learning_rate": 0.00016852830960889227, "loss": 0.8537, "step": 78560 }, { "epoch": 1.3794132621710353, "grad_norm": 0.06651443226645253, "learning_rate": 0.0001685201603434846, "loss": 0.8476, "step": 78570 }, { "epoch": 1.3795888270510366, "grad_norm": 0.05730325500196916, "learning_rate": 0.00016851201022258355, "loss": 0.8349, "step": 78580 }, { "epoch": 1.379764391931038, "grad_norm": 0.05116722733395741, "learning_rate": 0.00016850385924629236, "loss": 0.8481, "step": 78590 }, { "epoch": 1.3799399568110395, "grad_norm": 0.10581807727785406, "learning_rate": 0.00016849570741471432, "loss": 0.8398, "step": 78600 }, { "epoch": 1.380115521691041, "grad_norm": 0.06209771165941736, "learning_rate": 0.0001684875547279528, "loss": 0.8492, "step": 78610 }, { "epoch": 1.3802910865710423, "grad_norm": 0.09430060902109223, "learning_rate": 0.00016847940118611097, "loss": 0.8515, "step": 78620 }, { "epoch": 1.3804666514510437, "grad_norm": 0.06276210684879888, "learning_rate": 0.00016847124678929222, "loss": 0.8491, "step": 78630 }, { "epoch": 1.380642216331045, "grad_norm": 0.04489185989044163, "learning_rate": 0.00016846309153759984, "loss": 0.8469, "step": 78640 }, { "epoch": 1.3808177812110465, "grad_norm": 0.050925052807485806, "learning_rate": 0.00016845493543113716, "loss": 0.8488, "step": 78650 }, { "epoch": 1.380993346091048, "grad_norm": 0.05870777422964705, "learning_rate": 0.00016844677847000752, "loss": 0.8551, "step": 78660 }, { "epoch": 1.3811689109710494, "grad_norm": 0.0784375103814371, "learning_rate": 0.00016843862065431424, "loss": 0.8499, "step": 78670 }, { "epoch": 1.3813444758510507, "grad_norm": 0.07951408471598821, "learning_rate": 0.00016843046198416076, "loss": 0.8525, "step": 78680 }, { "epoch": 1.3815200407310522, "grad_norm": 0.08768248555788793, "learning_rate": 0.0001684223024596504, "loss": 0.8498, "step": 78690 }, { "epoch": 1.3816956056110536, "grad_norm": 0.05783587781970763, "learning_rate": 0.00016841414208088653, "loss": 0.8397, "step": 78700 }, { "epoch": 1.381871170491055, "grad_norm": 0.05034933125557133, "learning_rate": 0.00016840598084797256, "loss": 0.8532, "step": 78710 }, { "epoch": 1.3820467353710564, "grad_norm": 0.05626402911246426, "learning_rate": 0.00016839781876101188, "loss": 0.849, "step": 78720 }, { "epoch": 1.3822223002510579, "grad_norm": 0.07261129807699103, "learning_rate": 0.00016838965582010795, "loss": 0.847, "step": 78730 }, { "epoch": 1.3823978651310591, "grad_norm": 0.05552718678147833, "learning_rate": 0.00016838149202536416, "loss": 0.8464, "step": 78740 }, { "epoch": 1.3825734300110606, "grad_norm": 0.06347399527994904, "learning_rate": 0.00016837332737688394, "loss": 0.8512, "step": 78750 }, { "epoch": 1.382748994891062, "grad_norm": 0.048535972907131866, "learning_rate": 0.00016836516187477077, "loss": 0.8522, "step": 78760 }, { "epoch": 1.3829245597710633, "grad_norm": 0.06647986635039914, "learning_rate": 0.00016835699551912806, "loss": 0.8476, "step": 78770 }, { "epoch": 1.3831001246510648, "grad_norm": 0.06734490849933253, "learning_rate": 0.00016834882831005932, "loss": 0.8446, "step": 78780 }, { "epoch": 1.3832756895310663, "grad_norm": 0.06817396040382352, "learning_rate": 0.000168340660247668, "loss": 0.8457, "step": 78790 }, { "epoch": 1.3834512544110675, "grad_norm": 0.06780886205730813, "learning_rate": 0.00016833249133205763, "loss": 0.8525, "step": 78800 }, { "epoch": 1.383626819291069, "grad_norm": 0.06817856393989492, "learning_rate": 0.00016832432156333168, "loss": 0.8399, "step": 78810 }, { "epoch": 1.3838023841710705, "grad_norm": 0.06415108096199527, "learning_rate": 0.00016831615094159365, "loss": 0.8499, "step": 78820 }, { "epoch": 1.3839779490510717, "grad_norm": 0.055133587207536625, "learning_rate": 0.00016830797946694714, "loss": 0.8454, "step": 78830 }, { "epoch": 1.3841535139310732, "grad_norm": 0.06782507770795224, "learning_rate": 0.0001682998071394956, "loss": 0.8494, "step": 78840 }, { "epoch": 1.3843290788110747, "grad_norm": 0.053408358220809735, "learning_rate": 0.00016829163395934264, "loss": 0.8511, "step": 78850 }, { "epoch": 1.3845046436910762, "grad_norm": 0.07303523900365039, "learning_rate": 0.00016828345992659177, "loss": 0.843, "step": 78860 }, { "epoch": 1.3846802085710774, "grad_norm": 0.05252409863798492, "learning_rate": 0.00016827528504134652, "loss": 0.8466, "step": 78870 }, { "epoch": 1.384855773451079, "grad_norm": 0.047975256649109815, "learning_rate": 0.00016826710930371055, "loss": 0.8491, "step": 78880 }, { "epoch": 1.3850313383310802, "grad_norm": 0.05555473402600641, "learning_rate": 0.0001682589327137874, "loss": 0.8528, "step": 78890 }, { "epoch": 1.3852069032110816, "grad_norm": 0.060204000317047986, "learning_rate": 0.0001682507552716807, "loss": 0.8439, "step": 78900 }, { "epoch": 1.3853824680910831, "grad_norm": 0.09480414066769242, "learning_rate": 0.00016824257697749403, "loss": 0.8583, "step": 78910 }, { "epoch": 1.3855580329710846, "grad_norm": 0.05706154653125506, "learning_rate": 0.00016823439783133105, "loss": 0.8498, "step": 78920 }, { "epoch": 1.3857335978510859, "grad_norm": 0.049794745443520534, "learning_rate": 0.00016822621783329532, "loss": 0.8453, "step": 78930 }, { "epoch": 1.3859091627310873, "grad_norm": 0.06835132828434913, "learning_rate": 0.00016821803698349058, "loss": 0.8542, "step": 78940 }, { "epoch": 1.3860847276110886, "grad_norm": 0.06423733828825791, "learning_rate": 0.00016820985528202037, "loss": 0.8419, "step": 78950 }, { "epoch": 1.38626029249109, "grad_norm": 0.05642716228492373, "learning_rate": 0.00016820167272898844, "loss": 0.8507, "step": 78960 }, { "epoch": 1.3864358573710915, "grad_norm": 0.0571297055788433, "learning_rate": 0.00016819348932449845, "loss": 0.853, "step": 78970 }, { "epoch": 1.386611422251093, "grad_norm": 0.0711808622890846, "learning_rate": 0.00016818530506865407, "loss": 0.8419, "step": 78980 }, { "epoch": 1.3867869871310943, "grad_norm": 0.057332633229737556, "learning_rate": 0.00016817711996155898, "loss": 0.8459, "step": 78990 }, { "epoch": 1.3869625520110958, "grad_norm": 0.05670767767981809, "learning_rate": 0.00016816893400331691, "loss": 0.8589, "step": 79000 }, { "epoch": 1.387138116891097, "grad_norm": 0.0602102817251728, "learning_rate": 0.00016816074719403157, "loss": 0.8494, "step": 79010 }, { "epoch": 1.3873136817710985, "grad_norm": 0.05201930073393287, "learning_rate": 0.00016815255953380674, "loss": 0.8443, "step": 79020 }, { "epoch": 1.3874892466511, "grad_norm": 0.05091621738300832, "learning_rate": 0.00016814437102274606, "loss": 0.8437, "step": 79030 }, { "epoch": 1.3876648115311014, "grad_norm": 0.07472487847754303, "learning_rate": 0.0001681361816609533, "loss": 0.8465, "step": 79040 }, { "epoch": 1.3878403764111027, "grad_norm": 0.08201736894127766, "learning_rate": 0.0001681279914485323, "loss": 0.8453, "step": 79050 }, { "epoch": 1.3880159412911042, "grad_norm": 0.08938824555846861, "learning_rate": 0.0001681198003855868, "loss": 0.8454, "step": 79060 }, { "epoch": 1.3881915061711054, "grad_norm": 0.07611819174169084, "learning_rate": 0.00016811160847222053, "loss": 0.8492, "step": 79070 }, { "epoch": 1.388367071051107, "grad_norm": 0.056792043707831155, "learning_rate": 0.00016810341570853735, "loss": 0.8523, "step": 79080 }, { "epoch": 1.3885426359311084, "grad_norm": 0.05615429547536988, "learning_rate": 0.00016809522209464101, "loss": 0.8508, "step": 79090 }, { "epoch": 1.3887182008111099, "grad_norm": 0.06199155904748937, "learning_rate": 0.00016808702763063533, "loss": 0.8517, "step": 79100 }, { "epoch": 1.3888937656911111, "grad_norm": 0.06347151570954157, "learning_rate": 0.00016807883231662418, "loss": 0.8445, "step": 79110 }, { "epoch": 1.3890693305711126, "grad_norm": 0.07267975854471338, "learning_rate": 0.00016807063615271138, "loss": 0.8433, "step": 79120 }, { "epoch": 1.3892448954511138, "grad_norm": 0.0763009007026998, "learning_rate": 0.00016806243913900076, "loss": 0.8507, "step": 79130 }, { "epoch": 1.3894204603311153, "grad_norm": 0.05692341870416724, "learning_rate": 0.00016805424127559616, "loss": 0.8531, "step": 79140 }, { "epoch": 1.3895960252111168, "grad_norm": 0.0448652377971292, "learning_rate": 0.00016804604256260148, "loss": 0.848, "step": 79150 }, { "epoch": 1.3897715900911183, "grad_norm": 0.057473964021836285, "learning_rate": 0.0001680378430001206, "loss": 0.8453, "step": 79160 }, { "epoch": 1.3899471549711195, "grad_norm": 0.07820397258791767, "learning_rate": 0.00016802964258825738, "loss": 0.8512, "step": 79170 }, { "epoch": 1.390122719851121, "grad_norm": 0.07023807756215208, "learning_rate": 0.0001680214413271158, "loss": 0.8522, "step": 79180 }, { "epoch": 1.3902982847311223, "grad_norm": 0.05790518508918679, "learning_rate": 0.00016801323921679967, "loss": 0.8444, "step": 79190 }, { "epoch": 1.3904738496111237, "grad_norm": 0.04515198520493097, "learning_rate": 0.00016800503625741294, "loss": 0.8515, "step": 79200 }, { "epoch": 1.3906494144911252, "grad_norm": 0.04918464115044383, "learning_rate": 0.00016799683244905956, "loss": 0.8501, "step": 79210 }, { "epoch": 1.3908249793711267, "grad_norm": 0.04418892667215201, "learning_rate": 0.0001679886277918435, "loss": 0.8527, "step": 79220 }, { "epoch": 1.391000544251128, "grad_norm": 0.04702985202102055, "learning_rate": 0.0001679804222858687, "loss": 0.8438, "step": 79230 }, { "epoch": 1.3911761091311294, "grad_norm": 0.08660730790653737, "learning_rate": 0.00016797221593123907, "loss": 0.838, "step": 79240 }, { "epoch": 1.3913516740111307, "grad_norm": 0.056649686906684905, "learning_rate": 0.00016796400872805866, "loss": 0.8463, "step": 79250 }, { "epoch": 1.3915272388911322, "grad_norm": 0.06088877967753432, "learning_rate": 0.00016795580067643145, "loss": 0.8448, "step": 79260 }, { "epoch": 1.3917028037711336, "grad_norm": 0.06011206953590539, "learning_rate": 0.00016794759177646135, "loss": 0.845, "step": 79270 }, { "epoch": 1.3918783686511351, "grad_norm": 0.05800630958411314, "learning_rate": 0.00016793938202825246, "loss": 0.8529, "step": 79280 }, { "epoch": 1.3920539335311364, "grad_norm": 0.08527961574147586, "learning_rate": 0.00016793117143190878, "loss": 0.848, "step": 79290 }, { "epoch": 1.3922294984111379, "grad_norm": 0.05845465183381883, "learning_rate": 0.00016792295998753433, "loss": 0.8519, "step": 79300 }, { "epoch": 1.3924050632911391, "grad_norm": 0.05410498877602398, "learning_rate": 0.00016791474769523314, "loss": 0.8512, "step": 79310 }, { "epoch": 1.3925806281711406, "grad_norm": 0.047412129093375895, "learning_rate": 0.0001679065345551093, "loss": 0.8522, "step": 79320 }, { "epoch": 1.392756193051142, "grad_norm": 0.06430566949215867, "learning_rate": 0.0001678983205672668, "loss": 0.834, "step": 79330 }, { "epoch": 1.3929317579311435, "grad_norm": 0.04293639514967941, "learning_rate": 0.00016789010573180978, "loss": 0.8481, "step": 79340 }, { "epoch": 1.3931073228111448, "grad_norm": 0.08284641414215144, "learning_rate": 0.0001678818900488423, "loss": 0.8414, "step": 79350 }, { "epoch": 1.3932828876911463, "grad_norm": 0.05781833092937468, "learning_rate": 0.00016787367351846845, "loss": 0.8531, "step": 79360 }, { "epoch": 1.3934584525711475, "grad_norm": 0.053493605929655096, "learning_rate": 0.00016786545614079235, "loss": 0.8408, "step": 79370 }, { "epoch": 1.393634017451149, "grad_norm": 0.05499692847789034, "learning_rate": 0.0001678572379159181, "loss": 0.8512, "step": 79380 }, { "epoch": 1.3938095823311505, "grad_norm": 0.052279235834738966, "learning_rate": 0.00016784901884394984, "loss": 0.8556, "step": 79390 }, { "epoch": 1.393985147211152, "grad_norm": 0.07096700037632646, "learning_rate": 0.0001678407989249917, "loss": 0.8454, "step": 79400 }, { "epoch": 1.3941607120911532, "grad_norm": 0.060188456721607865, "learning_rate": 0.0001678325781591478, "loss": 0.8462, "step": 79410 }, { "epoch": 1.3943362769711547, "grad_norm": 0.07786647584791796, "learning_rate": 0.00016782435654652235, "loss": 0.8578, "step": 79420 }, { "epoch": 1.3945118418511562, "grad_norm": 0.05805863295628635, "learning_rate": 0.0001678161340872195, "loss": 0.844, "step": 79430 }, { "epoch": 1.3946874067311574, "grad_norm": 0.08469596498162564, "learning_rate": 0.00016780791078134343, "loss": 0.8585, "step": 79440 }, { "epoch": 1.394862971611159, "grad_norm": 0.06616028960039877, "learning_rate": 0.0001677996866289983, "loss": 0.8506, "step": 79450 }, { "epoch": 1.3950385364911604, "grad_norm": 0.07286813177656401, "learning_rate": 0.0001677914616302884, "loss": 0.8489, "step": 79460 }, { "epoch": 1.3952141013711616, "grad_norm": 0.05687366805607219, "learning_rate": 0.0001677832357853178, "loss": 0.8467, "step": 79470 }, { "epoch": 1.3953896662511631, "grad_norm": 0.05323284452853973, "learning_rate": 0.00016777500909419084, "loss": 0.8538, "step": 79480 }, { "epoch": 1.3955652311311646, "grad_norm": 0.0443920152877011, "learning_rate": 0.00016776678155701173, "loss": 0.8438, "step": 79490 }, { "epoch": 1.3957407960111659, "grad_norm": 0.07235982419387883, "learning_rate": 0.00016775855317388466, "loss": 0.8501, "step": 79500 }, { "epoch": 1.3959163608911673, "grad_norm": 0.07115561093165047, "learning_rate": 0.00016775032394491396, "loss": 0.8505, "step": 79510 }, { "epoch": 1.3960919257711688, "grad_norm": 0.0620121954572445, "learning_rate": 0.00016774209387020387, "loss": 0.8493, "step": 79520 }, { "epoch": 1.39626749065117, "grad_norm": 0.08450451470492404, "learning_rate": 0.00016773386294985867, "loss": 0.86, "step": 79530 }, { "epoch": 1.3964430555311715, "grad_norm": 0.05924780078822191, "learning_rate": 0.0001677256311839826, "loss": 0.8532, "step": 79540 }, { "epoch": 1.396618620411173, "grad_norm": 0.04885497982240299, "learning_rate": 0.00016771739857268, "loss": 0.852, "step": 79550 }, { "epoch": 1.3967941852911743, "grad_norm": 0.0681660737509734, "learning_rate": 0.00016770916511605518, "loss": 0.8498, "step": 79560 }, { "epoch": 1.3969697501711758, "grad_norm": 0.058692000075455227, "learning_rate": 0.00016770093081421247, "loss": 0.85, "step": 79570 }, { "epoch": 1.3971453150511772, "grad_norm": 0.05988779590226452, "learning_rate": 0.00016769269566725616, "loss": 0.8502, "step": 79580 }, { "epoch": 1.3973208799311787, "grad_norm": 0.06154904200556931, "learning_rate": 0.00016768445967529062, "loss": 0.8551, "step": 79590 }, { "epoch": 1.39749644481118, "grad_norm": 0.048274612357811435, "learning_rate": 0.00016767622283842023, "loss": 0.8434, "step": 79600 }, { "epoch": 1.3976720096911814, "grad_norm": 0.05840426567030655, "learning_rate": 0.0001676679851567493, "loss": 0.8482, "step": 79610 }, { "epoch": 1.3978475745711827, "grad_norm": 0.060119999238901864, "learning_rate": 0.0001676597466303822, "loss": 0.8455, "step": 79620 }, { "epoch": 1.3980231394511842, "grad_norm": 0.05604172794776091, "learning_rate": 0.00016765150725942335, "loss": 0.8503, "step": 79630 }, { "epoch": 1.3981987043311856, "grad_norm": 0.07233379678350892, "learning_rate": 0.0001676432670439771, "loss": 0.8432, "step": 79640 }, { "epoch": 1.3983742692111871, "grad_norm": 0.05574474654796039, "learning_rate": 0.0001676350259841479, "loss": 0.8504, "step": 79650 }, { "epoch": 1.3985498340911884, "grad_norm": 0.06798980652102544, "learning_rate": 0.0001676267840800402, "loss": 0.8466, "step": 79660 }, { "epoch": 1.3987253989711899, "grad_norm": 0.06351787894438427, "learning_rate": 0.0001676185413317583, "loss": 0.8441, "step": 79670 }, { "epoch": 1.3989009638511911, "grad_norm": 0.05562847013160359, "learning_rate": 0.00016761029773940675, "loss": 0.8542, "step": 79680 }, { "epoch": 1.3990765287311926, "grad_norm": 0.06826896947532406, "learning_rate": 0.00016760205330308995, "loss": 0.843, "step": 79690 }, { "epoch": 1.399252093611194, "grad_norm": 0.0499224413680815, "learning_rate": 0.0001675938080229124, "loss": 0.8541, "step": 79700 }, { "epoch": 1.3994276584911955, "grad_norm": 0.06070634697106178, "learning_rate": 0.00016758556189897853, "loss": 0.8559, "step": 79710 }, { "epoch": 1.3996032233711968, "grad_norm": 0.06482726683235214, "learning_rate": 0.0001675773149313928, "loss": 0.8473, "step": 79720 }, { "epoch": 1.3997787882511983, "grad_norm": 0.06776975545604932, "learning_rate": 0.00016756906712025977, "loss": 0.8452, "step": 79730 }, { "epoch": 1.3999543531311995, "grad_norm": 0.06299876291278901, "learning_rate": 0.00016756081846568387, "loss": 0.8427, "step": 79740 }, { "epoch": 1.400129918011201, "grad_norm": 0.05141373341853048, "learning_rate": 0.00016755256896776967, "loss": 0.8441, "step": 79750 }, { "epoch": 1.4003054828912025, "grad_norm": 0.06792807011831067, "learning_rate": 0.00016754431862662166, "loss": 0.8472, "step": 79760 }, { "epoch": 1.400481047771204, "grad_norm": 0.08005923894112484, "learning_rate": 0.00016753606744234438, "loss": 0.8468, "step": 79770 }, { "epoch": 1.4006566126512052, "grad_norm": 0.060029051320345306, "learning_rate": 0.00016752781541504237, "loss": 0.8414, "step": 79780 }, { "epoch": 1.4008321775312067, "grad_norm": 0.04883403367973953, "learning_rate": 0.00016751956254482014, "loss": 0.8507, "step": 79790 }, { "epoch": 1.401007742411208, "grad_norm": 0.0583673545144961, "learning_rate": 0.00016751130883178238, "loss": 0.8452, "step": 79800 }, { "epoch": 1.4011833072912094, "grad_norm": 0.07885467103676112, "learning_rate": 0.00016750305427603354, "loss": 0.8484, "step": 79810 }, { "epoch": 1.401358872171211, "grad_norm": 0.06455164855737235, "learning_rate": 0.0001674947988776783, "loss": 0.851, "step": 79820 }, { "epoch": 1.4015344370512124, "grad_norm": 0.05687922599526649, "learning_rate": 0.0001674865426368212, "loss": 0.8458, "step": 79830 }, { "epoch": 1.4017100019312136, "grad_norm": 0.05590273517928278, "learning_rate": 0.00016747828555356683, "loss": 0.8412, "step": 79840 }, { "epoch": 1.4018855668112151, "grad_norm": 0.06222962085003173, "learning_rate": 0.0001674700276280199, "loss": 0.8508, "step": 79850 }, { "epoch": 1.4020611316912164, "grad_norm": 0.09571362319427813, "learning_rate": 0.00016746176886028492, "loss": 0.8494, "step": 79860 }, { "epoch": 1.4022366965712179, "grad_norm": 0.0666946793427891, "learning_rate": 0.00016745350925046664, "loss": 0.8561, "step": 79870 }, { "epoch": 1.4024122614512193, "grad_norm": 0.06079641401660231, "learning_rate": 0.00016744524879866963, "loss": 0.8454, "step": 79880 }, { "epoch": 1.4025878263312208, "grad_norm": 0.052625939384957854, "learning_rate": 0.0001674369875049986, "loss": 0.8529, "step": 79890 }, { "epoch": 1.402763391211222, "grad_norm": 0.06265815583750872, "learning_rate": 0.00016742872536955818, "loss": 0.8554, "step": 79900 }, { "epoch": 1.4029389560912235, "grad_norm": 0.08353979269445572, "learning_rate": 0.0001674204623924531, "loss": 0.8531, "step": 79910 }, { "epoch": 1.4031145209712248, "grad_norm": 0.05034377679050616, "learning_rate": 0.00016741219857378806, "loss": 0.8517, "step": 79920 }, { "epoch": 1.4032900858512263, "grad_norm": 0.053511877541355696, "learning_rate": 0.00016740393391366772, "loss": 0.8484, "step": 79930 }, { "epoch": 1.4034656507312278, "grad_norm": 0.06506930898824914, "learning_rate": 0.00016739566841219677, "loss": 0.8537, "step": 79940 }, { "epoch": 1.4036412156112292, "grad_norm": 0.07447349163683194, "learning_rate": 0.00016738740206948, "loss": 0.8471, "step": 79950 }, { "epoch": 1.4038167804912305, "grad_norm": 0.07901490792668296, "learning_rate": 0.00016737913488562215, "loss": 0.852, "step": 79960 }, { "epoch": 1.403992345371232, "grad_norm": 0.06084866498166804, "learning_rate": 0.0001673708668607279, "loss": 0.8413, "step": 79970 }, { "epoch": 1.4041679102512332, "grad_norm": 0.048140856116035476, "learning_rate": 0.00016736259799490202, "loss": 0.8491, "step": 79980 }, { "epoch": 1.4043434751312347, "grad_norm": 0.04329567645123289, "learning_rate": 0.00016735432828824938, "loss": 0.8485, "step": 79990 }, { "epoch": 1.4045190400112362, "grad_norm": 0.07200779262494765, "learning_rate": 0.00016734605774087466, "loss": 0.8555, "step": 80000 }, { "epoch": 1.4046946048912377, "grad_norm": 0.05315096174957423, "learning_rate": 0.00016733778635288267, "loss": 0.8486, "step": 80010 }, { "epoch": 1.404870169771239, "grad_norm": 0.047884821586913665, "learning_rate": 0.0001673295141243782, "loss": 0.8521, "step": 80020 }, { "epoch": 1.4050457346512404, "grad_norm": 0.04808177917761078, "learning_rate": 0.00016732124105546607, "loss": 0.8456, "step": 80030 }, { "epoch": 1.4052212995312416, "grad_norm": 0.052769744127138855, "learning_rate": 0.00016731296714625113, "loss": 0.8515, "step": 80040 }, { "epoch": 1.4053968644112431, "grad_norm": 0.07892395478675614, "learning_rate": 0.0001673046923968382, "loss": 0.8518, "step": 80050 }, { "epoch": 1.4055724292912446, "grad_norm": 0.06924384266543553, "learning_rate": 0.00016729641680733208, "loss": 0.852, "step": 80060 }, { "epoch": 1.405747994171246, "grad_norm": 0.07000768263983016, "learning_rate": 0.00016728814037783764, "loss": 0.8518, "step": 80070 }, { "epoch": 1.4059235590512473, "grad_norm": 0.06736220886048667, "learning_rate": 0.00016727986310845978, "loss": 0.8423, "step": 80080 }, { "epoch": 1.4060991239312488, "grad_norm": 0.06820437390111074, "learning_rate": 0.00016727158499930334, "loss": 0.8461, "step": 80090 }, { "epoch": 1.40627468881125, "grad_norm": 0.05043150609854019, "learning_rate": 0.00016726330605047325, "loss": 0.8488, "step": 80100 }, { "epoch": 1.4064502536912515, "grad_norm": 0.06001908911003726, "learning_rate": 0.00016725502626207435, "loss": 0.8457, "step": 80110 }, { "epoch": 1.406625818571253, "grad_norm": 0.05811075227051948, "learning_rate": 0.00016724674563421154, "loss": 0.8532, "step": 80120 }, { "epoch": 1.4068013834512545, "grad_norm": 0.06108370094350564, "learning_rate": 0.00016723846416698984, "loss": 0.8511, "step": 80130 }, { "epoch": 1.4069769483312558, "grad_norm": 0.055129669973650486, "learning_rate": 0.00016723018186051402, "loss": 0.8455, "step": 80140 }, { "epoch": 1.4071525132112572, "grad_norm": 0.10033422290674208, "learning_rate": 0.00016722189871488916, "loss": 0.8489, "step": 80150 }, { "epoch": 1.4073280780912587, "grad_norm": 0.045254052155617445, "learning_rate": 0.00016721361473022014, "loss": 0.8468, "step": 80160 }, { "epoch": 1.40750364297126, "grad_norm": 0.06505215589383978, "learning_rate": 0.0001672053299066119, "loss": 0.8449, "step": 80170 }, { "epoch": 1.4076792078512614, "grad_norm": 0.052360306240289506, "learning_rate": 0.00016719704424416948, "loss": 0.8479, "step": 80180 }, { "epoch": 1.407854772731263, "grad_norm": 0.045069864589437104, "learning_rate": 0.0001671887577429978, "loss": 0.8509, "step": 80190 }, { "epoch": 1.4080303376112642, "grad_norm": 0.061195897270228186, "learning_rate": 0.0001671804704032019, "loss": 0.8496, "step": 80200 }, { "epoch": 1.4082059024912656, "grad_norm": 0.06136509338373765, "learning_rate": 0.00016717218222488673, "loss": 0.852, "step": 80210 }, { "epoch": 1.4083814673712671, "grad_norm": 0.07817534219840108, "learning_rate": 0.00016716389320815732, "loss": 0.8523, "step": 80220 }, { "epoch": 1.4085570322512684, "grad_norm": 0.049996458606025404, "learning_rate": 0.00016715560335311876, "loss": 0.8533, "step": 80230 }, { "epoch": 1.4087325971312699, "grad_norm": 0.06790224884776236, "learning_rate": 0.00016714731265987597, "loss": 0.8475, "step": 80240 }, { "epoch": 1.4089081620112713, "grad_norm": 0.06206258548093151, "learning_rate": 0.00016713902112853408, "loss": 0.8462, "step": 80250 }, { "epoch": 1.4090837268912726, "grad_norm": 0.07243583759455179, "learning_rate": 0.0001671307287591981, "loss": 0.8477, "step": 80260 }, { "epoch": 1.409259291771274, "grad_norm": 0.05807279551745136, "learning_rate": 0.00016712243555197314, "loss": 0.8493, "step": 80270 }, { "epoch": 1.4094348566512755, "grad_norm": 0.05773946869174366, "learning_rate": 0.00016711414150696424, "loss": 0.8501, "step": 80280 }, { "epoch": 1.4096104215312768, "grad_norm": 0.07008765154988973, "learning_rate": 0.00016710584662427646, "loss": 0.8488, "step": 80290 }, { "epoch": 1.4097859864112783, "grad_norm": 0.05332802770332509, "learning_rate": 0.00016709755090401498, "loss": 0.8539, "step": 80300 }, { "epoch": 1.4099615512912798, "grad_norm": 0.06237016481335205, "learning_rate": 0.00016708925434628485, "loss": 0.8467, "step": 80310 }, { "epoch": 1.4101371161712812, "grad_norm": 0.04611994129465062, "learning_rate": 0.00016708095695119117, "loss": 0.8464, "step": 80320 }, { "epoch": 1.4103126810512825, "grad_norm": 0.043454836907558504, "learning_rate": 0.00016707265871883912, "loss": 0.8462, "step": 80330 }, { "epoch": 1.410488245931284, "grad_norm": 0.07143960305866896, "learning_rate": 0.00016706435964933386, "loss": 0.8475, "step": 80340 }, { "epoch": 1.4106638108112852, "grad_norm": 0.06157879204717294, "learning_rate": 0.00016705605974278045, "loss": 0.8466, "step": 80350 }, { "epoch": 1.4108393756912867, "grad_norm": 0.06683636727926681, "learning_rate": 0.00016704775899928413, "loss": 0.8479, "step": 80360 }, { "epoch": 1.4110149405712882, "grad_norm": 0.05986512598024288, "learning_rate": 0.00016703945741895003, "loss": 0.8409, "step": 80370 }, { "epoch": 1.4111905054512897, "grad_norm": 0.06726621301965512, "learning_rate": 0.00016703115500188337, "loss": 0.8475, "step": 80380 }, { "epoch": 1.411366070331291, "grad_norm": 0.04549994900267229, "learning_rate": 0.0001670228517481893, "loss": 0.8543, "step": 80390 }, { "epoch": 1.4115416352112924, "grad_norm": 0.06584618799734351, "learning_rate": 0.00016701454765797306, "loss": 0.8425, "step": 80400 }, { "epoch": 1.4117172000912936, "grad_norm": 0.06301732972848835, "learning_rate": 0.00016700624273133984, "loss": 0.8452, "step": 80410 }, { "epoch": 1.4118927649712951, "grad_norm": 0.063154019401425, "learning_rate": 0.00016699793696839485, "loss": 0.8514, "step": 80420 }, { "epoch": 1.4120683298512966, "grad_norm": 0.058541584384326155, "learning_rate": 0.00016698963036924339, "loss": 0.8486, "step": 80430 }, { "epoch": 1.412243894731298, "grad_norm": 0.0672825827331428, "learning_rate": 0.00016698132293399066, "loss": 0.8515, "step": 80440 }, { "epoch": 1.4124194596112993, "grad_norm": 0.06656440865103634, "learning_rate": 0.00016697301466274194, "loss": 0.85, "step": 80450 }, { "epoch": 1.4125950244913008, "grad_norm": 0.06086301546677466, "learning_rate": 0.0001669647055556024, "loss": 0.8514, "step": 80460 }, { "epoch": 1.412770589371302, "grad_norm": 0.09377049866750063, "learning_rate": 0.0001669563956126775, "loss": 0.8586, "step": 80470 }, { "epoch": 1.4129461542513035, "grad_norm": 0.052940420111098105, "learning_rate": 0.00016694808483407237, "loss": 0.8465, "step": 80480 }, { "epoch": 1.413121719131305, "grad_norm": 0.06721437119269852, "learning_rate": 0.0001669397732198924, "loss": 0.8509, "step": 80490 }, { "epoch": 1.4132972840113065, "grad_norm": 0.053955980155329465, "learning_rate": 0.00016693146077024283, "loss": 0.8467, "step": 80500 }, { "epoch": 1.4134728488913078, "grad_norm": 0.05140887591316693, "learning_rate": 0.00016692314748522903, "loss": 0.852, "step": 80510 }, { "epoch": 1.4136484137713092, "grad_norm": 0.07117886362256355, "learning_rate": 0.00016691483336495636, "loss": 0.854, "step": 80520 }, { "epoch": 1.4138239786513105, "grad_norm": 0.07141302732152804, "learning_rate": 0.00016690651840953004, "loss": 0.8476, "step": 80530 }, { "epoch": 1.413999543531312, "grad_norm": 0.05327403536966666, "learning_rate": 0.00016689820261905553, "loss": 0.8542, "step": 80540 }, { "epoch": 1.4141751084113134, "grad_norm": 0.08608376675795494, "learning_rate": 0.00016688988599363816, "loss": 0.8414, "step": 80550 }, { "epoch": 1.414350673291315, "grad_norm": 0.06770721603177154, "learning_rate": 0.00016688156853338334, "loss": 0.8548, "step": 80560 }, { "epoch": 1.4145262381713162, "grad_norm": 0.04831665315232489, "learning_rate": 0.00016687325023839638, "loss": 0.8499, "step": 80570 }, { "epoch": 1.4147018030513177, "grad_norm": 0.04993581057461329, "learning_rate": 0.00016686493110878276, "loss": 0.8519, "step": 80580 }, { "epoch": 1.414877367931319, "grad_norm": 0.04811251431682981, "learning_rate": 0.00016685661114464778, "loss": 0.8473, "step": 80590 }, { "epoch": 1.4150529328113204, "grad_norm": 0.07105266551668855, "learning_rate": 0.000166848290346097, "loss": 0.8512, "step": 80600 }, { "epoch": 1.4152284976913219, "grad_norm": 0.05435750663485612, "learning_rate": 0.0001668399687132357, "loss": 0.8494, "step": 80610 }, { "epoch": 1.4154040625713233, "grad_norm": 0.05739231996224823, "learning_rate": 0.0001668316462461694, "loss": 0.8523, "step": 80620 }, { "epoch": 1.4155796274513246, "grad_norm": 0.0632155244916602, "learning_rate": 0.0001668233229450035, "loss": 0.8482, "step": 80630 }, { "epoch": 1.415755192331326, "grad_norm": 0.06876150753964218, "learning_rate": 0.0001668149988098435, "loss": 0.8509, "step": 80640 }, { "epoch": 1.4159307572113273, "grad_norm": 0.0715239860968698, "learning_rate": 0.00016680667384079485, "loss": 0.8524, "step": 80650 }, { "epoch": 1.4161063220913288, "grad_norm": 0.07304957695481351, "learning_rate": 0.00016679834803796302, "loss": 0.8474, "step": 80660 }, { "epoch": 1.4162818869713303, "grad_norm": 0.055250242734909825, "learning_rate": 0.00016679002140145352, "loss": 0.8483, "step": 80670 }, { "epoch": 1.4164574518513318, "grad_norm": 0.06151511790253977, "learning_rate": 0.00016678169393137185, "loss": 0.8481, "step": 80680 }, { "epoch": 1.416633016731333, "grad_norm": 0.08563207875738589, "learning_rate": 0.00016677336562782348, "loss": 0.8445, "step": 80690 }, { "epoch": 1.4168085816113345, "grad_norm": 0.11257590688993666, "learning_rate": 0.00016676503649091403, "loss": 0.8494, "step": 80700 }, { "epoch": 1.4169841464913357, "grad_norm": 0.07547966629663137, "learning_rate": 0.0001667567065207489, "loss": 0.8533, "step": 80710 }, { "epoch": 1.4171597113713372, "grad_norm": 0.0771635021143166, "learning_rate": 0.0001667483757174337, "loss": 0.8474, "step": 80720 }, { "epoch": 1.4173352762513387, "grad_norm": 0.05811437035027275, "learning_rate": 0.000166740044081074, "loss": 0.844, "step": 80730 }, { "epoch": 1.4175108411313402, "grad_norm": 0.050026539368022156, "learning_rate": 0.00016673171161177535, "loss": 0.842, "step": 80740 }, { "epoch": 1.4176864060113414, "grad_norm": 0.05374637996156687, "learning_rate": 0.0001667233783096433, "loss": 0.849, "step": 80750 }, { "epoch": 1.417861970891343, "grad_norm": 0.0708850722045445, "learning_rate": 0.00016671504417478347, "loss": 0.8501, "step": 80760 }, { "epoch": 1.4180375357713442, "grad_norm": 0.06733848193230633, "learning_rate": 0.0001667067092073014, "loss": 0.8493, "step": 80770 }, { "epoch": 1.4182131006513456, "grad_norm": 0.05788749315730294, "learning_rate": 0.00016669837340730275, "loss": 0.8515, "step": 80780 }, { "epoch": 1.4183886655313471, "grad_norm": 0.053075988905549425, "learning_rate": 0.0001666900367748931, "loss": 0.8477, "step": 80790 }, { "epoch": 1.4185642304113486, "grad_norm": 0.0663407373452303, "learning_rate": 0.0001666816993101781, "loss": 0.8526, "step": 80800 }, { "epoch": 1.4187397952913499, "grad_norm": 0.05510470585519964, "learning_rate": 0.0001666733610132634, "loss": 0.8583, "step": 80810 }, { "epoch": 1.4189153601713513, "grad_norm": 0.06724430881653688, "learning_rate": 0.00016666502188425464, "loss": 0.856, "step": 80820 }, { "epoch": 1.4190909250513526, "grad_norm": 0.06843771075307524, "learning_rate": 0.00016665668192325744, "loss": 0.8488, "step": 80830 }, { "epoch": 1.419266489931354, "grad_norm": 0.07061594249580785, "learning_rate": 0.00016664834113037753, "loss": 0.8469, "step": 80840 }, { "epoch": 1.4194420548113555, "grad_norm": 0.05690046946858986, "learning_rate": 0.00016663999950572051, "loss": 0.8503, "step": 80850 }, { "epoch": 1.419617619691357, "grad_norm": 0.0715613504113208, "learning_rate": 0.00016663165704939215, "loss": 0.8509, "step": 80860 }, { "epoch": 1.4197931845713583, "grad_norm": 0.06258772483823959, "learning_rate": 0.00016662331376149814, "loss": 0.8517, "step": 80870 }, { "epoch": 1.4199687494513598, "grad_norm": 0.06653892892486479, "learning_rate": 0.00016661496964214413, "loss": 0.8458, "step": 80880 }, { "epoch": 1.4201443143313612, "grad_norm": 0.07056399848911848, "learning_rate": 0.00016660662469143587, "loss": 0.8496, "step": 80890 }, { "epoch": 1.4203198792113625, "grad_norm": 0.05267893447498468, "learning_rate": 0.00016659827890947912, "loss": 0.8473, "step": 80900 }, { "epoch": 1.420495444091364, "grad_norm": 0.0922316067623594, "learning_rate": 0.00016658993229637964, "loss": 0.8387, "step": 80910 }, { "epoch": 1.4206710089713654, "grad_norm": 0.06444592424418351, "learning_rate": 0.00016658158485224306, "loss": 0.8522, "step": 80920 }, { "epoch": 1.4208465738513667, "grad_norm": 0.05676799245717015, "learning_rate": 0.0001665732365771753, "loss": 0.8457, "step": 80930 }, { "epoch": 1.4210221387313682, "grad_norm": 0.050412879734346175, "learning_rate": 0.00016656488747128208, "loss": 0.8535, "step": 80940 }, { "epoch": 1.4211977036113697, "grad_norm": 0.046524226771478217, "learning_rate": 0.00016655653753466912, "loss": 0.843, "step": 80950 }, { "epoch": 1.421373268491371, "grad_norm": 0.07127400184191716, "learning_rate": 0.00016654818676744226, "loss": 0.8492, "step": 80960 }, { "epoch": 1.4215488333713724, "grad_norm": 0.0629920590720666, "learning_rate": 0.00016653983516970734, "loss": 0.8565, "step": 80970 }, { "epoch": 1.4217243982513739, "grad_norm": 0.06498636695386655, "learning_rate": 0.00016653148274157014, "loss": 0.8523, "step": 80980 }, { "epoch": 1.4218999631313751, "grad_norm": 0.0882525169240322, "learning_rate": 0.0001665231294831365, "loss": 0.8537, "step": 80990 }, { "epoch": 1.4220755280113766, "grad_norm": 0.053451584159286775, "learning_rate": 0.00016651477539451223, "loss": 0.8476, "step": 81000 }, { "epoch": 1.422251092891378, "grad_norm": 0.0575202652742174, "learning_rate": 0.00016650642047580323, "loss": 0.8575, "step": 81010 }, { "epoch": 1.4224266577713793, "grad_norm": 0.039878215272903306, "learning_rate": 0.0001664980647271153, "loss": 0.8599, "step": 81020 }, { "epoch": 1.4226022226513808, "grad_norm": 0.061318468466322634, "learning_rate": 0.0001664897081485544, "loss": 0.8509, "step": 81030 }, { "epoch": 1.4227777875313823, "grad_norm": 0.061726354119980544, "learning_rate": 0.00016648135074022629, "loss": 0.8467, "step": 81040 }, { "epoch": 1.4229533524113838, "grad_norm": 0.06559577890286558, "learning_rate": 0.00016647299250223692, "loss": 0.851, "step": 81050 }, { "epoch": 1.423128917291385, "grad_norm": 0.05893476402516462, "learning_rate": 0.00016646463343469222, "loss": 0.8495, "step": 81060 }, { "epoch": 1.4233044821713865, "grad_norm": 0.05391029353417367, "learning_rate": 0.00016645627353769802, "loss": 0.8467, "step": 81070 }, { "epoch": 1.4234800470513878, "grad_norm": 0.06387303672039503, "learning_rate": 0.0001664479128113603, "loss": 0.8533, "step": 81080 }, { "epoch": 1.4236556119313892, "grad_norm": 0.061969887956094656, "learning_rate": 0.00016643955125578502, "loss": 0.8508, "step": 81090 }, { "epoch": 1.4238311768113907, "grad_norm": 0.04680135502872836, "learning_rate": 0.00016643118887107804, "loss": 0.8519, "step": 81100 }, { "epoch": 1.4240067416913922, "grad_norm": 0.05429387205143117, "learning_rate": 0.00016642282565734536, "loss": 0.8513, "step": 81110 }, { "epoch": 1.4241823065713934, "grad_norm": 0.0529076558652055, "learning_rate": 0.000166414461614693, "loss": 0.8474, "step": 81120 }, { "epoch": 1.424357871451395, "grad_norm": 0.061439474769756044, "learning_rate": 0.0001664060967432268, "loss": 0.8468, "step": 81130 }, { "epoch": 1.4245334363313962, "grad_norm": 0.06226914967962695, "learning_rate": 0.00016639773104305285, "loss": 0.8498, "step": 81140 }, { "epoch": 1.4247090012113977, "grad_norm": 0.04020420233785825, "learning_rate": 0.00016638936451427713, "loss": 0.8544, "step": 81150 }, { "epoch": 1.4248845660913991, "grad_norm": 0.04986726675399071, "learning_rate": 0.00016638099715700564, "loss": 0.8546, "step": 81160 }, { "epoch": 1.4250601309714006, "grad_norm": 0.04357940597462885, "learning_rate": 0.00016637262897134433, "loss": 0.8494, "step": 81170 }, { "epoch": 1.4252356958514019, "grad_norm": 0.055826780929132534, "learning_rate": 0.00016636425995739934, "loss": 0.8507, "step": 81180 }, { "epoch": 1.4254112607314033, "grad_norm": 0.06555626165159685, "learning_rate": 0.0001663558901152766, "loss": 0.8539, "step": 81190 }, { "epoch": 1.4255868256114046, "grad_norm": 0.06535223110610641, "learning_rate": 0.00016634751944508223, "loss": 0.8472, "step": 81200 }, { "epoch": 1.425762390491406, "grad_norm": 0.051787666827150726, "learning_rate": 0.00016633914794692226, "loss": 0.8466, "step": 81210 }, { "epoch": 1.4259379553714076, "grad_norm": 0.06212584807396637, "learning_rate": 0.00016633077562090275, "loss": 0.853, "step": 81220 }, { "epoch": 1.426113520251409, "grad_norm": 0.05084729890019404, "learning_rate": 0.00016632240246712985, "loss": 0.8491, "step": 81230 }, { "epoch": 1.4262890851314103, "grad_norm": 0.06273268135393979, "learning_rate": 0.00016631402848570954, "loss": 0.8516, "step": 81240 }, { "epoch": 1.4264646500114118, "grad_norm": 0.06267495906146463, "learning_rate": 0.00016630565367674799, "loss": 0.8584, "step": 81250 }, { "epoch": 1.426640214891413, "grad_norm": 0.057761233457685385, "learning_rate": 0.00016629727804035128, "loss": 0.8503, "step": 81260 }, { "epoch": 1.4268157797714145, "grad_norm": 0.059981895558747785, "learning_rate": 0.00016628890157662556, "loss": 0.85, "step": 81270 }, { "epoch": 1.426991344651416, "grad_norm": 0.05558639966814963, "learning_rate": 0.00016628052428567693, "loss": 0.8526, "step": 81280 }, { "epoch": 1.4271669095314174, "grad_norm": 0.05811373842918574, "learning_rate": 0.00016627214616761154, "loss": 0.8478, "step": 81290 }, { "epoch": 1.4273424744114187, "grad_norm": 0.0719687935629689, "learning_rate": 0.00016626376722253555, "loss": 0.8439, "step": 81300 }, { "epoch": 1.4275180392914202, "grad_norm": 0.058390661752836595, "learning_rate": 0.00016625538745055512, "loss": 0.8507, "step": 81310 }, { "epoch": 1.4276936041714214, "grad_norm": 0.0555816186649208, "learning_rate": 0.00016624700685177645, "loss": 0.8501, "step": 81320 }, { "epoch": 1.427869169051423, "grad_norm": 0.0674769552386713, "learning_rate": 0.0001662386254263057, "loss": 0.8503, "step": 81330 }, { "epoch": 1.4280447339314244, "grad_norm": 0.06113339159514674, "learning_rate": 0.00016623024317424903, "loss": 0.8529, "step": 81340 }, { "epoch": 1.4282202988114259, "grad_norm": 0.060408720216776186, "learning_rate": 0.00016622186009571268, "loss": 0.8517, "step": 81350 }, { "epoch": 1.4283958636914271, "grad_norm": 0.061103079774586455, "learning_rate": 0.00016621347619080287, "loss": 0.8488, "step": 81360 }, { "epoch": 1.4285714285714286, "grad_norm": 0.0620235064494806, "learning_rate": 0.00016620509145962584, "loss": 0.8467, "step": 81370 }, { "epoch": 1.4287469934514299, "grad_norm": 0.06323893700047466, "learning_rate": 0.0001661967059022878, "loss": 0.8508, "step": 81380 }, { "epoch": 1.4289225583314313, "grad_norm": 0.05795277704850642, "learning_rate": 0.000166188319518895, "loss": 0.8478, "step": 81390 }, { "epoch": 1.4290981232114328, "grad_norm": 0.08837848881221459, "learning_rate": 0.0001661799323095537, "loss": 0.8515, "step": 81400 }, { "epoch": 1.4292736880914343, "grad_norm": 0.06375950244576993, "learning_rate": 0.00016617154427437013, "loss": 0.8473, "step": 81410 }, { "epoch": 1.4294492529714355, "grad_norm": 0.06698571555624108, "learning_rate": 0.00016616315541345065, "loss": 0.8428, "step": 81420 }, { "epoch": 1.429624817851437, "grad_norm": 0.05157190999690618, "learning_rate": 0.00016615476572690146, "loss": 0.8438, "step": 81430 }, { "epoch": 1.4298003827314383, "grad_norm": 0.03986045202689041, "learning_rate": 0.0001661463752148289, "loss": 0.8477, "step": 81440 }, { "epoch": 1.4299759476114398, "grad_norm": 0.05881115207930584, "learning_rate": 0.0001661379838773393, "loss": 0.8532, "step": 81450 }, { "epoch": 1.4301515124914412, "grad_norm": 0.07434109866335734, "learning_rate": 0.000166129591714539, "loss": 0.8434, "step": 81460 }, { "epoch": 1.4303270773714427, "grad_norm": 0.05399343587026232, "learning_rate": 0.00016612119872653426, "loss": 0.8528, "step": 81470 }, { "epoch": 1.430502642251444, "grad_norm": 0.07188516252819399, "learning_rate": 0.00016611280491343142, "loss": 0.8463, "step": 81480 }, { "epoch": 1.4306782071314454, "grad_norm": 0.06324962723208821, "learning_rate": 0.0001661044102753369, "loss": 0.8529, "step": 81490 }, { "epoch": 1.4308537720114467, "grad_norm": 0.09136438715595296, "learning_rate": 0.000166096014812357, "loss": 0.8486, "step": 81500 }, { "epoch": 1.4310293368914482, "grad_norm": 0.09771243497291682, "learning_rate": 0.00016608761852459814, "loss": 0.8398, "step": 81510 }, { "epoch": 1.4312049017714497, "grad_norm": 0.07331110550954995, "learning_rate": 0.00016607922141216667, "loss": 0.8473, "step": 81520 }, { "epoch": 1.4313804666514511, "grad_norm": 0.06708247996388103, "learning_rate": 0.000166070823475169, "loss": 0.8424, "step": 81530 }, { "epoch": 1.4315560315314524, "grad_norm": 0.12329089348265725, "learning_rate": 0.00016606242471371152, "loss": 0.8487, "step": 81540 }, { "epoch": 1.4317315964114539, "grad_norm": 0.05183586870098595, "learning_rate": 0.00016605402512790063, "loss": 0.8485, "step": 81550 }, { "epoch": 1.4319071612914551, "grad_norm": 0.0646207466466142, "learning_rate": 0.0001660456247178428, "loss": 0.8497, "step": 81560 }, { "epoch": 1.4320827261714566, "grad_norm": 0.06697027095790936, "learning_rate": 0.00016603722348364444, "loss": 0.8632, "step": 81570 }, { "epoch": 1.432258291051458, "grad_norm": 0.07446657797285865, "learning_rate": 0.00016602882142541196, "loss": 0.8451, "step": 81580 }, { "epoch": 1.4324338559314596, "grad_norm": 0.06607769473796238, "learning_rate": 0.0001660204185432519, "loss": 0.8513, "step": 81590 }, { "epoch": 1.4326094208114608, "grad_norm": 0.06242651524695068, "learning_rate": 0.00016601201483727065, "loss": 0.8479, "step": 81600 }, { "epoch": 1.4327849856914623, "grad_norm": 0.055657406672754275, "learning_rate": 0.00016600361030757473, "loss": 0.8542, "step": 81610 }, { "epoch": 1.4329605505714638, "grad_norm": 0.05284858111903712, "learning_rate": 0.0001659952049542706, "loss": 0.8506, "step": 81620 }, { "epoch": 1.433136115451465, "grad_norm": 0.04955510222906755, "learning_rate": 0.00016598679877746474, "loss": 0.8508, "step": 81630 }, { "epoch": 1.4333116803314665, "grad_norm": 0.07368767858491977, "learning_rate": 0.0001659783917772637, "loss": 0.8556, "step": 81640 }, { "epoch": 1.433487245211468, "grad_norm": 0.05556705233878034, "learning_rate": 0.00016596998395377398, "loss": 0.8486, "step": 81650 }, { "epoch": 1.4336628100914692, "grad_norm": 0.07948674157324008, "learning_rate": 0.00016596157530710212, "loss": 0.8501, "step": 81660 }, { "epoch": 1.4338383749714707, "grad_norm": 0.06648287269144297, "learning_rate": 0.00016595316583735466, "loss": 0.8414, "step": 81670 }, { "epoch": 1.4340139398514722, "grad_norm": 0.07041830993190713, "learning_rate": 0.00016594475554463812, "loss": 0.8492, "step": 81680 }, { "epoch": 1.4341895047314734, "grad_norm": 0.06885257675439539, "learning_rate": 0.0001659363444290591, "loss": 0.8503, "step": 81690 }, { "epoch": 1.434365069611475, "grad_norm": 0.051308979577393665, "learning_rate": 0.00016592793249072416, "loss": 0.8464, "step": 81700 }, { "epoch": 1.4345406344914764, "grad_norm": 0.04913027586347958, "learning_rate": 0.00016591951972973984, "loss": 0.8547, "step": 81710 }, { "epoch": 1.4347161993714777, "grad_norm": 0.05417085238021662, "learning_rate": 0.00016591110614621278, "loss": 0.8492, "step": 81720 }, { "epoch": 1.4348917642514791, "grad_norm": 0.05587054149041163, "learning_rate": 0.00016590269174024958, "loss": 0.8478, "step": 81730 }, { "epoch": 1.4350673291314806, "grad_norm": 0.048485853618943024, "learning_rate": 0.00016589427651195684, "loss": 0.848, "step": 81740 }, { "epoch": 1.4352428940114819, "grad_norm": 0.06471525365516678, "learning_rate": 0.00016588586046144114, "loss": 0.8413, "step": 81750 }, { "epoch": 1.4354184588914833, "grad_norm": 0.052107167711944624, "learning_rate": 0.00016587744358880917, "loss": 0.8508, "step": 81760 }, { "epoch": 1.4355940237714848, "grad_norm": 0.05682243999027412, "learning_rate": 0.00016586902589416755, "loss": 0.8512, "step": 81770 }, { "epoch": 1.4357695886514863, "grad_norm": 0.05891079161437684, "learning_rate": 0.00016586060737762296, "loss": 0.844, "step": 81780 }, { "epoch": 1.4359451535314876, "grad_norm": 0.06307454563568696, "learning_rate": 0.00016585218803928203, "loss": 0.8467, "step": 81790 }, { "epoch": 1.436120718411489, "grad_norm": 0.051401804872663034, "learning_rate": 0.00016584376787925144, "loss": 0.8426, "step": 81800 }, { "epoch": 1.4362962832914903, "grad_norm": 0.05362755633021735, "learning_rate": 0.00016583534689763792, "loss": 0.8457, "step": 81810 }, { "epoch": 1.4364718481714918, "grad_norm": 0.07928538027659685, "learning_rate": 0.00016582692509454808, "loss": 0.8518, "step": 81820 }, { "epoch": 1.4366474130514932, "grad_norm": 0.053000829459285345, "learning_rate": 0.0001658185024700887, "loss": 0.8527, "step": 81830 }, { "epoch": 1.4368229779314947, "grad_norm": 0.05562059742252245, "learning_rate": 0.00016581007902436646, "loss": 0.8547, "step": 81840 }, { "epoch": 1.436998542811496, "grad_norm": 0.053263224761995305, "learning_rate": 0.00016580165475748812, "loss": 0.8486, "step": 81850 }, { "epoch": 1.4371741076914974, "grad_norm": 0.06257231971634099, "learning_rate": 0.00016579322966956038, "loss": 0.8464, "step": 81860 }, { "epoch": 1.4373496725714987, "grad_norm": 0.0586297690994776, "learning_rate": 0.00016578480376069001, "loss": 0.8507, "step": 81870 }, { "epoch": 1.4375252374515002, "grad_norm": 0.06171643193441123, "learning_rate": 0.00016577637703098376, "loss": 0.8532, "step": 81880 }, { "epoch": 1.4377008023315017, "grad_norm": 0.05528167056162864, "learning_rate": 0.00016576794948054842, "loss": 0.8444, "step": 81890 }, { "epoch": 1.4378763672115031, "grad_norm": 0.06580397591747844, "learning_rate": 0.00016575952110949068, "loss": 0.8503, "step": 81900 }, { "epoch": 1.4380519320915044, "grad_norm": 0.0608182540680753, "learning_rate": 0.00016575109191791746, "loss": 0.8553, "step": 81910 }, { "epoch": 1.4382274969715059, "grad_norm": 0.05277243169276416, "learning_rate": 0.00016574266190593547, "loss": 0.8525, "step": 81920 }, { "epoch": 1.4384030618515071, "grad_norm": 0.11068214863161625, "learning_rate": 0.00016573423107365157, "loss": 0.8451, "step": 81930 }, { "epoch": 1.4385786267315086, "grad_norm": 0.06849025260690104, "learning_rate": 0.00016572579942117255, "loss": 0.8501, "step": 81940 }, { "epoch": 1.43875419161151, "grad_norm": 0.04822011983380511, "learning_rate": 0.0001657173669486052, "loss": 0.85, "step": 81950 }, { "epoch": 1.4389297564915116, "grad_norm": 0.06489428690518545, "learning_rate": 0.00016570893365605647, "loss": 0.8444, "step": 81960 }, { "epoch": 1.4391053213715128, "grad_norm": 0.06402113775475579, "learning_rate": 0.00016570049954363312, "loss": 0.8529, "step": 81970 }, { "epoch": 1.4392808862515143, "grad_norm": 0.04553408421155728, "learning_rate": 0.00016569206461144204, "loss": 0.8465, "step": 81980 }, { "epoch": 1.4394564511315155, "grad_norm": 0.05634814547259967, "learning_rate": 0.0001656836288595901, "loss": 0.8497, "step": 81990 }, { "epoch": 1.439632016011517, "grad_norm": 0.06299148352068196, "learning_rate": 0.00016567519228818418, "loss": 0.8451, "step": 82000 }, { "epoch": 1.4398075808915185, "grad_norm": 0.05339113141962948, "learning_rate": 0.0001656667548973312, "loss": 0.8486, "step": 82010 }, { "epoch": 1.43998314577152, "grad_norm": 0.06741599750643325, "learning_rate": 0.00016565831668713803, "loss": 0.8467, "step": 82020 }, { "epoch": 1.4401587106515212, "grad_norm": 0.06613869107629984, "learning_rate": 0.0001656498776577116, "loss": 0.8427, "step": 82030 }, { "epoch": 1.4403342755315227, "grad_norm": 0.05106372330010516, "learning_rate": 0.0001656414378091588, "loss": 0.8454, "step": 82040 }, { "epoch": 1.440509840411524, "grad_norm": 0.04480645203991791, "learning_rate": 0.00016563299714158664, "loss": 0.8476, "step": 82050 }, { "epoch": 1.4406854052915254, "grad_norm": 0.0497163642930676, "learning_rate": 0.000165624555655102, "loss": 0.8427, "step": 82060 }, { "epoch": 1.440860970171527, "grad_norm": 0.04155836953612483, "learning_rate": 0.00016561611334981184, "loss": 0.8457, "step": 82070 }, { "epoch": 1.4410365350515284, "grad_norm": 0.051953023074655436, "learning_rate": 0.00016560767022582318, "loss": 0.8427, "step": 82080 }, { "epoch": 1.4412120999315297, "grad_norm": 0.05988964207114093, "learning_rate": 0.00016559922628324293, "loss": 0.8527, "step": 82090 }, { "epoch": 1.4413876648115311, "grad_norm": 0.0632986658250819, "learning_rate": 0.00016559078152217813, "loss": 0.8499, "step": 82100 }, { "epoch": 1.4415632296915324, "grad_norm": 0.05574400948195976, "learning_rate": 0.00016558233594273568, "loss": 0.8461, "step": 82110 }, { "epoch": 1.4417387945715339, "grad_norm": 0.06335068320433018, "learning_rate": 0.0001655738895450227, "loss": 0.8537, "step": 82120 }, { "epoch": 1.4419143594515353, "grad_norm": 0.052641206780308286, "learning_rate": 0.0001655654423291462, "loss": 0.8559, "step": 82130 }, { "epoch": 1.4420899243315368, "grad_norm": 0.07566180787884327, "learning_rate": 0.0001655569942952131, "loss": 0.8503, "step": 82140 }, { "epoch": 1.442265489211538, "grad_norm": 0.05021101734129216, "learning_rate": 0.0001655485454433306, "loss": 0.8512, "step": 82150 }, { "epoch": 1.4424410540915396, "grad_norm": 0.06673553609808773, "learning_rate": 0.00016554009577360556, "loss": 0.8487, "step": 82160 }, { "epoch": 1.4426166189715408, "grad_norm": 0.05936777009905232, "learning_rate": 0.0001655316452861452, "loss": 0.8523, "step": 82170 }, { "epoch": 1.4427921838515423, "grad_norm": 0.049148136786596096, "learning_rate": 0.0001655231939810565, "loss": 0.8485, "step": 82180 }, { "epoch": 1.4429677487315438, "grad_norm": 0.056237701206464846, "learning_rate": 0.00016551474185844657, "loss": 0.8498, "step": 82190 }, { "epoch": 1.4431433136115452, "grad_norm": 0.053333551322937586, "learning_rate": 0.00016550628891842252, "loss": 0.8442, "step": 82200 }, { "epoch": 1.4433188784915465, "grad_norm": 0.05332003937787498, "learning_rate": 0.0001654978351610914, "loss": 0.8586, "step": 82210 }, { "epoch": 1.443494443371548, "grad_norm": 0.0622895199246092, "learning_rate": 0.00016548938058656037, "loss": 0.8484, "step": 82220 }, { "epoch": 1.4436700082515492, "grad_norm": 0.05646201757485554, "learning_rate": 0.0001654809251949365, "loss": 0.848, "step": 82230 }, { "epoch": 1.4438455731315507, "grad_norm": 0.0586135558777663, "learning_rate": 0.00016547246898632696, "loss": 0.8499, "step": 82240 }, { "epoch": 1.4440211380115522, "grad_norm": 0.05858635713115686, "learning_rate": 0.00016546401196083888, "loss": 0.851, "step": 82250 }, { "epoch": 1.4441967028915537, "grad_norm": 0.05669222807484035, "learning_rate": 0.00016545555411857945, "loss": 0.8527, "step": 82260 }, { "epoch": 1.444372267771555, "grad_norm": 0.05294480643399446, "learning_rate": 0.00016544709545965577, "loss": 0.847, "step": 82270 }, { "epoch": 1.4445478326515564, "grad_norm": 0.0701926658759587, "learning_rate": 0.00016543863598417503, "loss": 0.8379, "step": 82280 }, { "epoch": 1.4447233975315577, "grad_norm": 0.05516968519372344, "learning_rate": 0.00016543017569224444, "loss": 0.8517, "step": 82290 }, { "epoch": 1.4448989624115591, "grad_norm": 0.049788854389740754, "learning_rate": 0.00016542171458397117, "loss": 0.8547, "step": 82300 }, { "epoch": 1.4450745272915606, "grad_norm": 0.08829046259936403, "learning_rate": 0.0001654132526594624, "loss": 0.8536, "step": 82310 }, { "epoch": 1.445250092171562, "grad_norm": 0.05105319996191013, "learning_rate": 0.0001654047899188254, "loss": 0.8539, "step": 82320 }, { "epoch": 1.4454256570515633, "grad_norm": 0.06952533071417957, "learning_rate": 0.00016539632636216738, "loss": 0.847, "step": 82330 }, { "epoch": 1.4456012219315648, "grad_norm": 0.06139622014626932, "learning_rate": 0.00016538786198959555, "loss": 0.8522, "step": 82340 }, { "epoch": 1.4457767868115663, "grad_norm": 0.0751115200031959, "learning_rate": 0.00016537939680121716, "loss": 0.8499, "step": 82350 }, { "epoch": 1.4459523516915676, "grad_norm": 0.051252937127982914, "learning_rate": 0.00016537093079713948, "loss": 0.8476, "step": 82360 }, { "epoch": 1.446127916571569, "grad_norm": 0.09184272665107596, "learning_rate": 0.0001653624639774698, "loss": 0.8494, "step": 82370 }, { "epoch": 1.4463034814515705, "grad_norm": 0.09774901348923852, "learning_rate": 0.00016535399634231532, "loss": 0.8535, "step": 82380 }, { "epoch": 1.4464790463315718, "grad_norm": 0.09097648279859763, "learning_rate": 0.00016534552789178338, "loss": 0.8435, "step": 82390 }, { "epoch": 1.4466546112115732, "grad_norm": 0.05205209323808941, "learning_rate": 0.00016533705862598131, "loss": 0.8482, "step": 82400 }, { "epoch": 1.4468301760915747, "grad_norm": 0.06821978544268174, "learning_rate": 0.00016532858854501634, "loss": 0.8443, "step": 82410 }, { "epoch": 1.447005740971576, "grad_norm": 0.05651481654830118, "learning_rate": 0.00016532011764899585, "loss": 0.8462, "step": 82420 }, { "epoch": 1.4471813058515774, "grad_norm": 0.059524699556812076, "learning_rate": 0.0001653116459380271, "loss": 0.8511, "step": 82430 }, { "epoch": 1.447356870731579, "grad_norm": 0.05181151595439115, "learning_rate": 0.00016530317341221753, "loss": 0.8556, "step": 82440 }, { "epoch": 1.4475324356115802, "grad_norm": 0.05962709418244785, "learning_rate": 0.00016529470007167437, "loss": 0.8528, "step": 82450 }, { "epoch": 1.4477080004915817, "grad_norm": 0.055199076211060974, "learning_rate": 0.0001652862259165051, "loss": 0.8418, "step": 82460 }, { "epoch": 1.4478835653715831, "grad_norm": 0.06690756759506102, "learning_rate": 0.000165277750946817, "loss": 0.8435, "step": 82470 }, { "epoch": 1.4480591302515844, "grad_norm": 0.07657314676329585, "learning_rate": 0.00016526927516271743, "loss": 0.8501, "step": 82480 }, { "epoch": 1.4482346951315859, "grad_norm": 0.04764099759795592, "learning_rate": 0.00016526079856431388, "loss": 0.8561, "step": 82490 }, { "epoch": 1.4484102600115873, "grad_norm": 0.05729134826122519, "learning_rate": 0.00016525232115171368, "loss": 0.8503, "step": 82500 }, { "epoch": 1.4485858248915888, "grad_norm": 0.058505868935257097, "learning_rate": 0.00016524384292502427, "loss": 0.841, "step": 82510 }, { "epoch": 1.44876138977159, "grad_norm": 0.05474617460730219, "learning_rate": 0.00016523536388435303, "loss": 0.8469, "step": 82520 }, { "epoch": 1.4489369546515916, "grad_norm": 0.0572850499550895, "learning_rate": 0.00016522688402980743, "loss": 0.853, "step": 82530 }, { "epoch": 1.4491125195315928, "grad_norm": 0.07818363016703796, "learning_rate": 0.00016521840336149494, "loss": 0.8446, "step": 82540 }, { "epoch": 1.4492880844115943, "grad_norm": 0.05875160174067679, "learning_rate": 0.0001652099218795229, "loss": 0.8478, "step": 82550 }, { "epoch": 1.4494636492915958, "grad_norm": 0.06557796067218928, "learning_rate": 0.0001652014395839989, "loss": 0.8449, "step": 82560 }, { "epoch": 1.4496392141715972, "grad_norm": 0.0712333135236993, "learning_rate": 0.00016519295647503038, "loss": 0.8523, "step": 82570 }, { "epoch": 1.4498147790515985, "grad_norm": 0.06038061081455513, "learning_rate": 0.0001651844725527247, "loss": 0.857, "step": 82580 }, { "epoch": 1.4499903439316, "grad_norm": 0.08669381286246113, "learning_rate": 0.00016517598781718953, "loss": 0.8498, "step": 82590 }, { "epoch": 1.4501659088116012, "grad_norm": 0.0728078319195362, "learning_rate": 0.0001651675022685323, "loss": 0.854, "step": 82600 }, { "epoch": 1.4503414736916027, "grad_norm": 0.046159052408452084, "learning_rate": 0.00016515901590686047, "loss": 0.8519, "step": 82610 }, { "epoch": 1.4505170385716042, "grad_norm": 0.06888371081698937, "learning_rate": 0.00016515052873228162, "loss": 0.8483, "step": 82620 }, { "epoch": 1.4506926034516057, "grad_norm": 0.07636135616082246, "learning_rate": 0.00016514204074490329, "loss": 0.8507, "step": 82630 }, { "epoch": 1.450868168331607, "grad_norm": 0.07229532162430803, "learning_rate": 0.00016513355194483305, "loss": 0.848, "step": 82640 }, { "epoch": 1.4510437332116084, "grad_norm": 0.10631567242050408, "learning_rate": 0.00016512506233217836, "loss": 0.8419, "step": 82650 }, { "epoch": 1.4512192980916097, "grad_norm": 0.0545622194481474, "learning_rate": 0.0001651165719070469, "loss": 0.8461, "step": 82660 }, { "epoch": 1.4513948629716111, "grad_norm": 0.0435817354886127, "learning_rate": 0.0001651080806695461, "loss": 0.8475, "step": 82670 }, { "epoch": 1.4515704278516126, "grad_norm": 0.05249490232378303, "learning_rate": 0.0001650995886197837, "loss": 0.8462, "step": 82680 }, { "epoch": 1.451745992731614, "grad_norm": 0.05195188826090613, "learning_rate": 0.0001650910957578672, "loss": 0.8448, "step": 82690 }, { "epoch": 1.4519215576116153, "grad_norm": 0.06809873419284677, "learning_rate": 0.00016508260208390423, "loss": 0.8518, "step": 82700 }, { "epoch": 1.4520971224916168, "grad_norm": 0.058655186888511575, "learning_rate": 0.00016507410759800245, "loss": 0.8466, "step": 82710 }, { "epoch": 1.452272687371618, "grad_norm": 0.07326411862899015, "learning_rate": 0.0001650656123002694, "loss": 0.8538, "step": 82720 }, { "epoch": 1.4524482522516196, "grad_norm": 0.05380323163386629, "learning_rate": 0.00016505711619081278, "loss": 0.8532, "step": 82730 }, { "epoch": 1.452623817131621, "grad_norm": 0.08144974454351225, "learning_rate": 0.00016504861926974022, "loss": 0.8559, "step": 82740 }, { "epoch": 1.4527993820116225, "grad_norm": 0.047155717114269864, "learning_rate": 0.00016504012153715938, "loss": 0.8515, "step": 82750 }, { "epoch": 1.4529749468916238, "grad_norm": 0.0737277422789445, "learning_rate": 0.00016503162299317793, "loss": 0.8467, "step": 82760 }, { "epoch": 1.4531505117716252, "grad_norm": 0.05078842601942875, "learning_rate": 0.00016502312363790353, "loss": 0.8471, "step": 82770 }, { "epoch": 1.4533260766516265, "grad_norm": 0.05518370818999657, "learning_rate": 0.0001650146234714439, "loss": 0.8456, "step": 82780 }, { "epoch": 1.453501641531628, "grad_norm": 0.06089900147026913, "learning_rate": 0.0001650061224939067, "loss": 0.857, "step": 82790 }, { "epoch": 1.4536772064116295, "grad_norm": 0.07948722631877808, "learning_rate": 0.0001649976207053997, "loss": 0.8549, "step": 82800 }, { "epoch": 1.453852771291631, "grad_norm": 0.049523388940105544, "learning_rate": 0.00016498911810603059, "loss": 0.8482, "step": 82810 }, { "epoch": 1.4540283361716322, "grad_norm": 0.08741808458519698, "learning_rate": 0.00016498061469590703, "loss": 0.8396, "step": 82820 }, { "epoch": 1.4542039010516337, "grad_norm": 0.056388813095355005, "learning_rate": 0.00016497211047513687, "loss": 0.8462, "step": 82830 }, { "epoch": 1.454379465931635, "grad_norm": 0.04713287934280101, "learning_rate": 0.00016496360544382782, "loss": 0.845, "step": 82840 }, { "epoch": 1.4545550308116364, "grad_norm": 0.06428904949149719, "learning_rate": 0.00016495509960208756, "loss": 0.8456, "step": 82850 }, { "epoch": 1.4547305956916379, "grad_norm": 0.053069493654574175, "learning_rate": 0.000164946592950024, "loss": 0.8465, "step": 82860 }, { "epoch": 1.4549061605716394, "grad_norm": 0.05758706400883537, "learning_rate": 0.0001649380854877448, "loss": 0.8516, "step": 82870 }, { "epoch": 1.4550817254516406, "grad_norm": 0.08819547233180827, "learning_rate": 0.00016492957721535783, "loss": 0.8464, "step": 82880 }, { "epoch": 1.455257290331642, "grad_norm": 0.05788796384935592, "learning_rate": 0.00016492106813297087, "loss": 0.8433, "step": 82890 }, { "epoch": 1.4554328552116433, "grad_norm": 0.06512481571733301, "learning_rate": 0.0001649125582406917, "loss": 0.8545, "step": 82900 }, { "epoch": 1.4556084200916448, "grad_norm": 0.07420216935785456, "learning_rate": 0.0001649040475386282, "loss": 0.8407, "step": 82910 }, { "epoch": 1.4557839849716463, "grad_norm": 0.06678653818312046, "learning_rate": 0.00016489553602688813, "loss": 0.8525, "step": 82920 }, { "epoch": 1.4559595498516478, "grad_norm": 0.05470701464128382, "learning_rate": 0.00016488702370557942, "loss": 0.8531, "step": 82930 }, { "epoch": 1.456135114731649, "grad_norm": 0.07499048019279897, "learning_rate": 0.00016487851057480986, "loss": 0.8586, "step": 82940 }, { "epoch": 1.4563106796116505, "grad_norm": 0.06814406292430976, "learning_rate": 0.00016486999663468732, "loss": 0.8497, "step": 82950 }, { "epoch": 1.4564862444916518, "grad_norm": 0.044869614866475845, "learning_rate": 0.00016486148188531969, "loss": 0.8446, "step": 82960 }, { "epoch": 1.4566618093716532, "grad_norm": 0.056202092827752285, "learning_rate": 0.00016485296632681487, "loss": 0.8447, "step": 82970 }, { "epoch": 1.4568373742516547, "grad_norm": 0.059034832560779016, "learning_rate": 0.0001648444499592807, "loss": 0.8502, "step": 82980 }, { "epoch": 1.4570129391316562, "grad_norm": 0.059779592427453405, "learning_rate": 0.0001648359327828251, "loss": 0.8494, "step": 82990 }, { "epoch": 1.4571885040116574, "grad_norm": 0.06370818222399202, "learning_rate": 0.00016482741479755602, "loss": 0.8525, "step": 83000 }, { "epoch": 1.457364068891659, "grad_norm": 0.06487183681948328, "learning_rate": 0.00016481889600358134, "loss": 0.8522, "step": 83010 }, { "epoch": 1.4575396337716602, "grad_norm": 0.04940061312054276, "learning_rate": 0.00016481037640100904, "loss": 0.8608, "step": 83020 }, { "epoch": 1.4577151986516617, "grad_norm": 0.0693936519404394, "learning_rate": 0.000164801855989947, "loss": 0.854, "step": 83030 }, { "epoch": 1.4578907635316631, "grad_norm": 0.05501299495831504, "learning_rate": 0.00016479333477050328, "loss": 0.8432, "step": 83040 }, { "epoch": 1.4580663284116646, "grad_norm": 0.05228364267367111, "learning_rate": 0.00016478481274278574, "loss": 0.8465, "step": 83050 }, { "epoch": 1.4582418932916659, "grad_norm": 0.06473960932345034, "learning_rate": 0.0001647762899069024, "loss": 0.8513, "step": 83060 }, { "epoch": 1.4584174581716673, "grad_norm": 0.07384947836707849, "learning_rate": 0.0001647677662629613, "loss": 0.8414, "step": 83070 }, { "epoch": 1.4585930230516688, "grad_norm": 0.06715855929886091, "learning_rate": 0.00016475924181107033, "loss": 0.8422, "step": 83080 }, { "epoch": 1.45876858793167, "grad_norm": 0.0756824368167195, "learning_rate": 0.00016475071655133755, "loss": 0.8562, "step": 83090 }, { "epoch": 1.4589441528116716, "grad_norm": 0.061943030296957184, "learning_rate": 0.00016474219048387098, "loss": 0.8417, "step": 83100 }, { "epoch": 1.459119717691673, "grad_norm": 0.07601170316511748, "learning_rate": 0.00016473366360877863, "loss": 0.8438, "step": 83110 }, { "epoch": 1.4592952825716743, "grad_norm": 0.05867038056409399, "learning_rate": 0.00016472513592616857, "loss": 0.851, "step": 83120 }, { "epoch": 1.4594708474516758, "grad_norm": 0.07655701596646221, "learning_rate": 0.00016471660743614877, "loss": 0.8446, "step": 83130 }, { "epoch": 1.4596464123316772, "grad_norm": 0.0695438356911637, "learning_rate": 0.0001647080781388274, "loss": 0.8454, "step": 83140 }, { "epoch": 1.4598219772116785, "grad_norm": 0.058979625055680766, "learning_rate": 0.00016469954803431243, "loss": 0.8379, "step": 83150 }, { "epoch": 1.45999754209168, "grad_norm": 0.06308580621163681, "learning_rate": 0.00016469101712271203, "loss": 0.8465, "step": 83160 }, { "epoch": 1.4601731069716815, "grad_norm": 0.07146374327914055, "learning_rate": 0.0001646824854041342, "loss": 0.8454, "step": 83170 }, { "epoch": 1.4603486718516827, "grad_norm": 0.05678535993778328, "learning_rate": 0.0001646739528786871, "loss": 0.8499, "step": 83180 }, { "epoch": 1.4605242367316842, "grad_norm": 0.05258110863697477, "learning_rate": 0.00016466541954647876, "loss": 0.8541, "step": 83190 }, { "epoch": 1.4606998016116857, "grad_norm": 0.05367740198889225, "learning_rate": 0.0001646568854076174, "loss": 0.854, "step": 83200 }, { "epoch": 1.460875366491687, "grad_norm": 0.08222962544632216, "learning_rate": 0.0001646483504622111, "loss": 0.8505, "step": 83210 }, { "epoch": 1.4610509313716884, "grad_norm": 0.056274129687850644, "learning_rate": 0.00016463981471036797, "loss": 0.8462, "step": 83220 }, { "epoch": 1.4612264962516899, "grad_norm": 0.06012879764926438, "learning_rate": 0.0001646312781521962, "loss": 0.8455, "step": 83230 }, { "epoch": 1.4614020611316914, "grad_norm": 0.059232792482478355, "learning_rate": 0.00016462274078780402, "loss": 0.8485, "step": 83240 }, { "epoch": 1.4615776260116926, "grad_norm": 0.06122545405959567, "learning_rate": 0.0001646142026172994, "loss": 0.8442, "step": 83250 }, { "epoch": 1.461753190891694, "grad_norm": 0.0705622437425045, "learning_rate": 0.00016460566364079072, "loss": 0.8513, "step": 83260 }, { "epoch": 1.4619287557716953, "grad_norm": 0.05179115137758072, "learning_rate": 0.00016459712385838608, "loss": 0.8502, "step": 83270 }, { "epoch": 1.4621043206516968, "grad_norm": 0.05473910442649749, "learning_rate": 0.0001645885832701937, "loss": 0.854, "step": 83280 }, { "epoch": 1.4622798855316983, "grad_norm": 0.055243334565992716, "learning_rate": 0.00016458004187632177, "loss": 0.8448, "step": 83290 }, { "epoch": 1.4624554504116998, "grad_norm": 0.09460163290911132, "learning_rate": 0.00016457149967687848, "loss": 0.8504, "step": 83300 }, { "epoch": 1.462631015291701, "grad_norm": 0.0633446499842625, "learning_rate": 0.00016456295667197215, "loss": 0.8471, "step": 83310 }, { "epoch": 1.4628065801717025, "grad_norm": 0.06357857543201693, "learning_rate": 0.000164554412861711, "loss": 0.845, "step": 83320 }, { "epoch": 1.4629821450517038, "grad_norm": 0.07747755809653474, "learning_rate": 0.00016454586824620322, "loss": 0.8483, "step": 83330 }, { "epoch": 1.4631577099317052, "grad_norm": 0.07020983443058701, "learning_rate": 0.0001645373228255571, "loss": 0.852, "step": 83340 }, { "epoch": 1.4633332748117067, "grad_norm": 0.05860229914707375, "learning_rate": 0.00016452877659988094, "loss": 0.8515, "step": 83350 }, { "epoch": 1.4635088396917082, "grad_norm": 0.06203618615569894, "learning_rate": 0.00016452022956928302, "loss": 0.8469, "step": 83360 }, { "epoch": 1.4636844045717095, "grad_norm": 0.06011692296326116, "learning_rate": 0.0001645116817338716, "loss": 0.8496, "step": 83370 }, { "epoch": 1.463859969451711, "grad_norm": 0.06084773890408523, "learning_rate": 0.00016450313309375502, "loss": 0.8456, "step": 83380 }, { "epoch": 1.4640355343317122, "grad_norm": 0.0498935081040404, "learning_rate": 0.0001644945836490416, "loss": 0.8518, "step": 83390 }, { "epoch": 1.4642110992117137, "grad_norm": 0.06991196831058599, "learning_rate": 0.00016448603339983955, "loss": 0.8595, "step": 83400 }, { "epoch": 1.4643866640917151, "grad_norm": 0.06999009353745347, "learning_rate": 0.00016447748234625734, "loss": 0.8508, "step": 83410 }, { "epoch": 1.4645622289717166, "grad_norm": 0.06670348182021099, "learning_rate": 0.00016446893048840329, "loss": 0.8389, "step": 83420 }, { "epoch": 1.4647377938517179, "grad_norm": 0.05261730885533947, "learning_rate": 0.00016446037782638566, "loss": 0.8468, "step": 83430 }, { "epoch": 1.4649133587317194, "grad_norm": 0.10406067017673917, "learning_rate": 0.00016445182436031295, "loss": 0.8428, "step": 83440 }, { "epoch": 1.4650889236117206, "grad_norm": 0.08351982728278491, "learning_rate": 0.00016444327009029343, "loss": 0.8501, "step": 83450 }, { "epoch": 1.465264488491722, "grad_norm": 0.0600436719476642, "learning_rate": 0.0001644347150164355, "loss": 0.8441, "step": 83460 }, { "epoch": 1.4654400533717236, "grad_norm": 0.04789150846823069, "learning_rate": 0.0001644261591388476, "loss": 0.848, "step": 83470 }, { "epoch": 1.465615618251725, "grad_norm": 0.04636475912563549, "learning_rate": 0.00016441760245763812, "loss": 0.8451, "step": 83480 }, { "epoch": 1.4657911831317263, "grad_norm": 0.05750500373571481, "learning_rate": 0.00016440904497291544, "loss": 0.8528, "step": 83490 }, { "epoch": 1.4659667480117278, "grad_norm": 0.05366277473859499, "learning_rate": 0.00016440048668478802, "loss": 0.8489, "step": 83500 }, { "epoch": 1.466142312891729, "grad_norm": 0.0624007484429861, "learning_rate": 0.0001643919275933643, "loss": 0.8443, "step": 83510 }, { "epoch": 1.4663178777717305, "grad_norm": 0.05469634577681651, "learning_rate": 0.00016438336769875267, "loss": 0.8476, "step": 83520 }, { "epoch": 1.466493442651732, "grad_norm": 0.05363039040125383, "learning_rate": 0.00016437480700106163, "loss": 0.8457, "step": 83530 }, { "epoch": 1.4666690075317335, "grad_norm": 0.05134204952727971, "learning_rate": 0.00016436624550039965, "loss": 0.8475, "step": 83540 }, { "epoch": 1.4668445724117347, "grad_norm": 0.08867293772960044, "learning_rate": 0.0001643576831968752, "loss": 0.8498, "step": 83550 }, { "epoch": 1.4670201372917362, "grad_norm": 0.050583944348932146, "learning_rate": 0.00016434912009059675, "loss": 0.8548, "step": 83560 }, { "epoch": 1.4671957021717374, "grad_norm": 0.05833836105327453, "learning_rate": 0.0001643405561816728, "loss": 0.8536, "step": 83570 }, { "epoch": 1.467371267051739, "grad_norm": 0.08186630579267011, "learning_rate": 0.0001643319914702119, "loss": 0.8474, "step": 83580 }, { "epoch": 1.4675468319317404, "grad_norm": 0.07892507111381213, "learning_rate": 0.0001643234259563225, "loss": 0.8421, "step": 83590 }, { "epoch": 1.4677223968117419, "grad_norm": 0.06979043911993592, "learning_rate": 0.00016431485964011318, "loss": 0.8487, "step": 83600 }, { "epoch": 1.4678979616917431, "grad_norm": 0.05480735524762101, "learning_rate": 0.00016430629252169244, "loss": 0.847, "step": 83610 }, { "epoch": 1.4680735265717446, "grad_norm": 0.09894346199036219, "learning_rate": 0.00016429772460116882, "loss": 0.8524, "step": 83620 }, { "epoch": 1.4682490914517459, "grad_norm": 0.056376703250638475, "learning_rate": 0.00016428915587865091, "loss": 0.8511, "step": 83630 }, { "epoch": 1.4684246563317473, "grad_norm": 0.07990472456030612, "learning_rate": 0.0001642805863542473, "loss": 0.8536, "step": 83640 }, { "epoch": 1.4686002212117488, "grad_norm": 0.06913545581843061, "learning_rate": 0.0001642720160280665, "loss": 0.847, "step": 83650 }, { "epoch": 1.4687757860917503, "grad_norm": 0.0579034662810748, "learning_rate": 0.00016426344490021714, "loss": 0.8586, "step": 83660 }, { "epoch": 1.4689513509717516, "grad_norm": 0.09994456265800757, "learning_rate": 0.00016425487297080783, "loss": 0.8463, "step": 83670 }, { "epoch": 1.469126915851753, "grad_norm": 0.07106796846528536, "learning_rate": 0.0001642463002399471, "loss": 0.8495, "step": 83680 }, { "epoch": 1.4693024807317543, "grad_norm": 0.07993415696982449, "learning_rate": 0.0001642377267077437, "loss": 0.8547, "step": 83690 }, { "epoch": 1.4694780456117558, "grad_norm": 0.061182466852569464, "learning_rate": 0.00016422915237430613, "loss": 0.8485, "step": 83700 }, { "epoch": 1.4696536104917572, "grad_norm": 0.05323573383754816, "learning_rate": 0.0001642205772397431, "loss": 0.8568, "step": 83710 }, { "epoch": 1.4698291753717587, "grad_norm": 0.07028395777526043, "learning_rate": 0.00016421200130416322, "loss": 0.8525, "step": 83720 }, { "epoch": 1.47000474025176, "grad_norm": 0.04755331155984356, "learning_rate": 0.00016420342456767518, "loss": 0.8558, "step": 83730 }, { "epoch": 1.4701803051317615, "grad_norm": 0.06387308413249157, "learning_rate": 0.00016419484703038766, "loss": 0.8487, "step": 83740 }, { "epoch": 1.4703558700117627, "grad_norm": 0.06563129726165447, "learning_rate": 0.0001641862686924093, "loss": 0.8415, "step": 83750 }, { "epoch": 1.4705314348917642, "grad_norm": 0.05237930419296945, "learning_rate": 0.0001641776895538488, "loss": 0.8482, "step": 83760 }, { "epoch": 1.4707069997717657, "grad_norm": 0.0489651462929584, "learning_rate": 0.00016416910961481487, "loss": 0.8493, "step": 83770 }, { "epoch": 1.4708825646517671, "grad_norm": 0.046624895007500146, "learning_rate": 0.00016416052887541623, "loss": 0.8442, "step": 83780 }, { "epoch": 1.4710581295317684, "grad_norm": 0.05797369646396325, "learning_rate": 0.00016415194733576156, "loss": 0.8447, "step": 83790 }, { "epoch": 1.4712336944117699, "grad_norm": 0.05295624759712933, "learning_rate": 0.00016414336499595965, "loss": 0.8472, "step": 83800 }, { "epoch": 1.4714092592917714, "grad_norm": 0.08413271679331502, "learning_rate": 0.00016413478185611915, "loss": 0.8482, "step": 83810 }, { "epoch": 1.4715848241717726, "grad_norm": 0.07818568793907567, "learning_rate": 0.00016412619791634887, "loss": 0.8395, "step": 83820 }, { "epoch": 1.471760389051774, "grad_norm": 0.059634263072868406, "learning_rate": 0.0001641176131767576, "loss": 0.8433, "step": 83830 }, { "epoch": 1.4719359539317756, "grad_norm": 0.05589605812794592, "learning_rate": 0.00016410902763745405, "loss": 0.8559, "step": 83840 }, { "epoch": 1.4721115188117768, "grad_norm": 0.061234815576302074, "learning_rate": 0.00016410044129854708, "loss": 0.8465, "step": 83850 }, { "epoch": 1.4722870836917783, "grad_norm": 0.07052593468086003, "learning_rate": 0.00016409185416014534, "loss": 0.8444, "step": 83860 }, { "epoch": 1.4724626485717798, "grad_norm": 0.0522579041376801, "learning_rate": 0.00016408326622235778, "loss": 0.8549, "step": 83870 }, { "epoch": 1.472638213451781, "grad_norm": 0.06310910903587647, "learning_rate": 0.00016407467748529313, "loss": 0.8511, "step": 83880 }, { "epoch": 1.4728137783317825, "grad_norm": 0.0832827627986753, "learning_rate": 0.0001640660879490602, "loss": 0.8445, "step": 83890 }, { "epoch": 1.472989343211784, "grad_norm": 0.0677492593520059, "learning_rate": 0.00016405749761376788, "loss": 0.8544, "step": 83900 }, { "epoch": 1.4731649080917852, "grad_norm": 0.07750607024301977, "learning_rate": 0.00016404890647952496, "loss": 0.8475, "step": 83910 }, { "epoch": 1.4733404729717867, "grad_norm": 0.05787392720430598, "learning_rate": 0.00016404031454644036, "loss": 0.8527, "step": 83920 }, { "epoch": 1.4735160378517882, "grad_norm": 0.053864694781824596, "learning_rate": 0.00016403172181462286, "loss": 0.8405, "step": 83930 }, { "epoch": 1.4736916027317895, "grad_norm": 0.05432627195765173, "learning_rate": 0.0001640231282841813, "loss": 0.8486, "step": 83940 }, { "epoch": 1.473867167611791, "grad_norm": 0.07757548115447277, "learning_rate": 0.0001640145339552247, "loss": 0.8471, "step": 83950 }, { "epoch": 1.4740427324917924, "grad_norm": 0.05999409516123081, "learning_rate": 0.00016400593882786189, "loss": 0.8434, "step": 83960 }, { "epoch": 1.4742182973717939, "grad_norm": 0.08409206617593677, "learning_rate": 0.00016399734290220176, "loss": 0.8476, "step": 83970 }, { "epoch": 1.4743938622517951, "grad_norm": 0.05432163849708818, "learning_rate": 0.00016398874617835319, "loss": 0.845, "step": 83980 }, { "epoch": 1.4745694271317966, "grad_norm": 0.05949145528716372, "learning_rate": 0.00016398014865642513, "loss": 0.8517, "step": 83990 }, { "epoch": 1.4747449920117979, "grad_norm": 0.05583554594522306, "learning_rate": 0.00016397155033652652, "loss": 0.8527, "step": 84000 }, { "epoch": 1.4749205568917994, "grad_norm": 0.06418454773620946, "learning_rate": 0.00016396295121876632, "loss": 0.8446, "step": 84010 }, { "epoch": 1.4750961217718008, "grad_norm": 0.06976343582211729, "learning_rate": 0.00016395435130325348, "loss": 0.8568, "step": 84020 }, { "epoch": 1.4752716866518023, "grad_norm": 0.05664942856251392, "learning_rate": 0.00016394575059009686, "loss": 0.8449, "step": 84030 }, { "epoch": 1.4754472515318036, "grad_norm": 0.054188841288438074, "learning_rate": 0.00016393714907940557, "loss": 0.845, "step": 84040 }, { "epoch": 1.475622816411805, "grad_norm": 0.0809605667133745, "learning_rate": 0.00016392854677128857, "loss": 0.8454, "step": 84050 }, { "epoch": 1.4757983812918063, "grad_norm": 0.0611854658646288, "learning_rate": 0.00016391994366585477, "loss": 0.8489, "step": 84060 }, { "epoch": 1.4759739461718078, "grad_norm": 0.07558577034626296, "learning_rate": 0.00016391133976321326, "loss": 0.849, "step": 84070 }, { "epoch": 1.4761495110518092, "grad_norm": 0.05818857331538572, "learning_rate": 0.000163902735063473, "loss": 0.8486, "step": 84080 }, { "epoch": 1.4763250759318107, "grad_norm": 0.0704530965842298, "learning_rate": 0.00016389412956674304, "loss": 0.8499, "step": 84090 }, { "epoch": 1.476500640811812, "grad_norm": 0.06953434139112732, "learning_rate": 0.0001638855232731324, "loss": 0.8502, "step": 84100 }, { "epoch": 1.4766762056918135, "grad_norm": 0.06513762206350118, "learning_rate": 0.00016387691618275014, "loss": 0.8445, "step": 84110 }, { "epoch": 1.4768517705718147, "grad_norm": 0.06906031369162255, "learning_rate": 0.00016386830829570527, "loss": 0.8527, "step": 84120 }, { "epoch": 1.4770273354518162, "grad_norm": 0.0640613637784772, "learning_rate": 0.0001638596996121069, "loss": 0.839, "step": 84130 }, { "epoch": 1.4772029003318177, "grad_norm": 0.06869763897199568, "learning_rate": 0.0001638510901320641, "loss": 0.8441, "step": 84140 }, { "epoch": 1.4773784652118191, "grad_norm": 0.04911610700239731, "learning_rate": 0.00016384247985568596, "loss": 0.8398, "step": 84150 }, { "epoch": 1.4775540300918204, "grad_norm": 0.04870044223991355, "learning_rate": 0.0001638338687830815, "loss": 0.8443, "step": 84160 }, { "epoch": 1.4777295949718219, "grad_norm": 0.06476066285295365, "learning_rate": 0.00016382525691435991, "loss": 0.8521, "step": 84170 }, { "epoch": 1.4779051598518231, "grad_norm": 0.05703805239549158, "learning_rate": 0.0001638166442496303, "loss": 0.8503, "step": 84180 }, { "epoch": 1.4780807247318246, "grad_norm": 0.04973424567291954, "learning_rate": 0.00016380803078900176, "loss": 0.8445, "step": 84190 }, { "epoch": 1.478256289611826, "grad_norm": 0.04907243412242144, "learning_rate": 0.00016379941653258342, "loss": 0.8564, "step": 84200 }, { "epoch": 1.4784318544918276, "grad_norm": 0.0654915003154788, "learning_rate": 0.0001637908014804844, "loss": 0.856, "step": 84210 }, { "epoch": 1.4786074193718288, "grad_norm": 0.06813303181921693, "learning_rate": 0.00016378218563281396, "loss": 0.8474, "step": 84220 }, { "epoch": 1.4787829842518303, "grad_norm": 0.06940234956850423, "learning_rate": 0.00016377356898968114, "loss": 0.8457, "step": 84230 }, { "epoch": 1.4789585491318316, "grad_norm": 0.05506172187636441, "learning_rate": 0.00016376495155119523, "loss": 0.851, "step": 84240 }, { "epoch": 1.479134114011833, "grad_norm": 0.04765432518736365, "learning_rate": 0.0001637563333174653, "loss": 0.8479, "step": 84250 }, { "epoch": 1.4793096788918345, "grad_norm": 0.05123380366558768, "learning_rate": 0.00016374771428860063, "loss": 0.8467, "step": 84260 }, { "epoch": 1.479485243771836, "grad_norm": 0.061235502163980896, "learning_rate": 0.00016373909446471037, "loss": 0.8531, "step": 84270 }, { "epoch": 1.4796608086518372, "grad_norm": 0.07220142004407613, "learning_rate": 0.0001637304738459038, "loss": 0.8458, "step": 84280 }, { "epoch": 1.4798363735318387, "grad_norm": 0.054361561597827505, "learning_rate": 0.0001637218524322901, "loss": 0.8625, "step": 84290 }, { "epoch": 1.48001193841184, "grad_norm": 0.07592914107142865, "learning_rate": 0.00016371323022397845, "loss": 0.8459, "step": 84300 }, { "epoch": 1.4801875032918415, "grad_norm": 0.0774039235809577, "learning_rate": 0.0001637046072210782, "loss": 0.8469, "step": 84310 }, { "epoch": 1.480363068171843, "grad_norm": 0.0779929985513404, "learning_rate": 0.00016369598342369856, "loss": 0.8433, "step": 84320 }, { "epoch": 1.4805386330518444, "grad_norm": 0.06173047184112846, "learning_rate": 0.00016368735883194875, "loss": 0.8537, "step": 84330 }, { "epoch": 1.4807141979318457, "grad_norm": 0.059739375694670416, "learning_rate": 0.00016367873344593813, "loss": 0.8565, "step": 84340 }, { "epoch": 1.4808897628118471, "grad_norm": 0.056878831431495074, "learning_rate": 0.00016367010726577594, "loss": 0.8537, "step": 84350 }, { "epoch": 1.4810653276918484, "grad_norm": 0.06957267565009297, "learning_rate": 0.0001636614802915715, "loss": 0.839, "step": 84360 }, { "epoch": 1.4812408925718499, "grad_norm": 0.05472944689439533, "learning_rate": 0.0001636528525234341, "loss": 0.8511, "step": 84370 }, { "epoch": 1.4814164574518514, "grad_norm": 0.068115991335367, "learning_rate": 0.000163644223961473, "loss": 0.848, "step": 84380 }, { "epoch": 1.4815920223318528, "grad_norm": 0.0830855554778225, "learning_rate": 0.0001636355946057976, "loss": 0.8452, "step": 84390 }, { "epoch": 1.481767587211854, "grad_norm": 0.07414770887781472, "learning_rate": 0.00016362696445651722, "loss": 0.8539, "step": 84400 }, { "epoch": 1.4819431520918556, "grad_norm": 0.07106414004090908, "learning_rate": 0.00016361833351374118, "loss": 0.841, "step": 84410 }, { "epoch": 1.4821187169718568, "grad_norm": 0.046989133966727666, "learning_rate": 0.00016360970177757886, "loss": 0.8501, "step": 84420 }, { "epoch": 1.4822942818518583, "grad_norm": 0.09925399749235998, "learning_rate": 0.00016360106924813965, "loss": 0.8456, "step": 84430 }, { "epoch": 1.4824698467318598, "grad_norm": 0.05512724387462179, "learning_rate": 0.0001635924359255329, "loss": 0.8495, "step": 84440 }, { "epoch": 1.4826454116118613, "grad_norm": 0.056717313275357804, "learning_rate": 0.0001635838018098679, "loss": 0.8394, "step": 84450 }, { "epoch": 1.4828209764918625, "grad_norm": 0.06828312676328038, "learning_rate": 0.00016357516690125422, "loss": 0.8453, "step": 84460 }, { "epoch": 1.482996541371864, "grad_norm": 0.05449147759917187, "learning_rate": 0.00016356653119980116, "loss": 0.8439, "step": 84470 }, { "epoch": 1.4831721062518652, "grad_norm": 0.06374790294417038, "learning_rate": 0.00016355789470561818, "loss": 0.8482, "step": 84480 }, { "epoch": 1.4833476711318667, "grad_norm": 0.06231291889742601, "learning_rate": 0.00016354925741881465, "loss": 0.8474, "step": 84490 }, { "epoch": 1.4835232360118682, "grad_norm": 0.05718750085956236, "learning_rate": 0.00016354061933950004, "loss": 0.8464, "step": 84500 }, { "epoch": 1.4836988008918697, "grad_norm": 0.056667141868149105, "learning_rate": 0.00016353198046778383, "loss": 0.862, "step": 84510 }, { "epoch": 1.483874365771871, "grad_norm": 0.05767194199882116, "learning_rate": 0.00016352334080377542, "loss": 0.8506, "step": 84520 }, { "epoch": 1.4840499306518724, "grad_norm": 0.06741602243429867, "learning_rate": 0.0001635147003475843, "loss": 0.8533, "step": 84530 }, { "epoch": 1.4842254955318739, "grad_norm": 0.0719872704396953, "learning_rate": 0.00016350605909931995, "loss": 0.8474, "step": 84540 }, { "epoch": 1.4844010604118751, "grad_norm": 0.059957767920791064, "learning_rate": 0.00016349741705909188, "loss": 0.8474, "step": 84550 }, { "epoch": 1.4845766252918766, "grad_norm": 0.05238885083326305, "learning_rate": 0.00016348877422700953, "loss": 0.8471, "step": 84560 }, { "epoch": 1.484752190171878, "grad_norm": 0.052659975055194784, "learning_rate": 0.0001634801306031824, "loss": 0.8466, "step": 84570 }, { "epoch": 1.4849277550518794, "grad_norm": 0.06367777247355702, "learning_rate": 0.0001634714861877201, "loss": 0.8494, "step": 84580 }, { "epoch": 1.4851033199318808, "grad_norm": 0.06797520074457872, "learning_rate": 0.0001634628409807321, "loss": 0.8495, "step": 84590 }, { "epoch": 1.4852788848118823, "grad_norm": 0.06489518239529428, "learning_rate": 0.0001634541949823279, "loss": 0.8539, "step": 84600 }, { "epoch": 1.4854544496918836, "grad_norm": 0.059280217142603016, "learning_rate": 0.00016344554819261708, "loss": 0.8478, "step": 84610 }, { "epoch": 1.485630014571885, "grad_norm": 0.054951523024527306, "learning_rate": 0.0001634369006117092, "loss": 0.8494, "step": 84620 }, { "epoch": 1.4858055794518865, "grad_norm": 0.0509198352793718, "learning_rate": 0.00016342825223971384, "loss": 0.8461, "step": 84630 }, { "epoch": 1.4859811443318878, "grad_norm": 0.08097559214355181, "learning_rate": 0.00016341960307674055, "loss": 0.853, "step": 84640 }, { "epoch": 1.4861567092118892, "grad_norm": 0.09073657388524083, "learning_rate": 0.00016341095312289895, "loss": 0.8465, "step": 84650 }, { "epoch": 1.4863322740918907, "grad_norm": 0.05597060869306052, "learning_rate": 0.00016340230237829855, "loss": 0.8441, "step": 84660 }, { "epoch": 1.486507838971892, "grad_norm": 0.06405056900285544, "learning_rate": 0.00016339365084304906, "loss": 0.8489, "step": 84670 }, { "epoch": 1.4866834038518935, "grad_norm": 0.07507183327286537, "learning_rate": 0.00016338499851726007, "loss": 0.8515, "step": 84680 }, { "epoch": 1.486858968731895, "grad_norm": 0.06186822543453404, "learning_rate": 0.0001633763454010412, "loss": 0.8492, "step": 84690 }, { "epoch": 1.4870345336118964, "grad_norm": 0.06728424360050769, "learning_rate": 0.00016336769149450205, "loss": 0.8516, "step": 84700 }, { "epoch": 1.4872100984918977, "grad_norm": 0.06313058988266618, "learning_rate": 0.00016335903679775228, "loss": 0.8546, "step": 84710 }, { "epoch": 1.4873856633718991, "grad_norm": 0.07012106372395695, "learning_rate": 0.0001633503813109016, "loss": 0.8389, "step": 84720 }, { "epoch": 1.4875612282519004, "grad_norm": 0.04965338273055245, "learning_rate": 0.00016334172503405963, "loss": 0.8503, "step": 84730 }, { "epoch": 1.4877367931319019, "grad_norm": 0.06594092120578991, "learning_rate": 0.00016333306796733604, "loss": 0.8502, "step": 84740 }, { "epoch": 1.4879123580119034, "grad_norm": 0.07733255419851398, "learning_rate": 0.00016332441011084053, "loss": 0.8471, "step": 84750 }, { "epoch": 1.4880879228919048, "grad_norm": 0.07144134479436538, "learning_rate": 0.00016331575146468283, "loss": 0.8485, "step": 84760 }, { "epoch": 1.488263487771906, "grad_norm": 0.061377113130857266, "learning_rate": 0.0001633070920289726, "loss": 0.8477, "step": 84770 }, { "epoch": 1.4884390526519076, "grad_norm": 0.058492717189140495, "learning_rate": 0.0001632984318038196, "loss": 0.8426, "step": 84780 }, { "epoch": 1.4886146175319088, "grad_norm": 0.06249317094658534, "learning_rate": 0.00016328977078933348, "loss": 0.8537, "step": 84790 }, { "epoch": 1.4887901824119103, "grad_norm": 0.08350555665413405, "learning_rate": 0.00016328110898562403, "loss": 0.846, "step": 84800 }, { "epoch": 1.4889657472919118, "grad_norm": 0.06239562239804185, "learning_rate": 0.000163272446392801, "loss": 0.8494, "step": 84810 }, { "epoch": 1.4891413121719133, "grad_norm": 0.07620392853966715, "learning_rate": 0.00016326378301097417, "loss": 0.8525, "step": 84820 }, { "epoch": 1.4893168770519145, "grad_norm": 0.10732709929486454, "learning_rate": 0.00016325511884025327, "loss": 0.8441, "step": 84830 }, { "epoch": 1.489492441931916, "grad_norm": 0.06668229758336931, "learning_rate": 0.00016324645388074804, "loss": 0.8427, "step": 84840 }, { "epoch": 1.4896680068119172, "grad_norm": 0.049112831668743404, "learning_rate": 0.00016323778813256835, "loss": 0.8565, "step": 84850 }, { "epoch": 1.4898435716919187, "grad_norm": 0.06384265954601573, "learning_rate": 0.00016322912159582396, "loss": 0.8481, "step": 84860 }, { "epoch": 1.4900191365719202, "grad_norm": 0.06779440427829297, "learning_rate": 0.0001632204542706247, "loss": 0.8531, "step": 84870 }, { "epoch": 1.4901947014519217, "grad_norm": 0.06094112196354802, "learning_rate": 0.0001632117861570803, "loss": 0.8518, "step": 84880 }, { "epoch": 1.490370266331923, "grad_norm": 0.06988054835496674, "learning_rate": 0.00016320311725530063, "loss": 0.8492, "step": 84890 }, { "epoch": 1.4905458312119244, "grad_norm": 0.050223239474612005, "learning_rate": 0.0001631944475653956, "loss": 0.8597, "step": 84900 }, { "epoch": 1.4907213960919257, "grad_norm": 0.05936625026763667, "learning_rate": 0.00016318577708747495, "loss": 0.8474, "step": 84910 }, { "epoch": 1.4908969609719271, "grad_norm": 0.0718880486520171, "learning_rate": 0.00016317710582164864, "loss": 0.8583, "step": 84920 }, { "epoch": 1.4910725258519286, "grad_norm": 0.05436058980299165, "learning_rate": 0.00016316843376802643, "loss": 0.8502, "step": 84930 }, { "epoch": 1.49124809073193, "grad_norm": 0.04439330237700216, "learning_rate": 0.00016315976092671832, "loss": 0.8492, "step": 84940 }, { "epoch": 1.4914236556119314, "grad_norm": 0.06551722637677, "learning_rate": 0.00016315108729783408, "loss": 0.8541, "step": 84950 }, { "epoch": 1.4915992204919328, "grad_norm": 0.07391160409925793, "learning_rate": 0.00016314241288148366, "loss": 0.8503, "step": 84960 }, { "epoch": 1.491774785371934, "grad_norm": 0.051161447234192026, "learning_rate": 0.00016313373767777695, "loss": 0.849, "step": 84970 }, { "epoch": 1.4919503502519356, "grad_norm": 0.048684300740799435, "learning_rate": 0.00016312506168682386, "loss": 0.8508, "step": 84980 }, { "epoch": 1.492125915131937, "grad_norm": 0.051970921177277994, "learning_rate": 0.00016311638490873438, "loss": 0.8459, "step": 84990 }, { "epoch": 1.4923014800119385, "grad_norm": 0.06270977106599583, "learning_rate": 0.00016310770734361837, "loss": 0.8394, "step": 85000 }, { "epoch": 1.4924770448919398, "grad_norm": 0.0446748467797246, "learning_rate": 0.0001630990289915858, "loss": 0.8476, "step": 85010 }, { "epoch": 1.4926526097719413, "grad_norm": 0.059960838755978234, "learning_rate": 0.00016309034985274667, "loss": 0.8416, "step": 85020 }, { "epoch": 1.4928281746519425, "grad_norm": 0.0576482748976259, "learning_rate": 0.00016308166992721088, "loss": 0.8465, "step": 85030 }, { "epoch": 1.493003739531944, "grad_norm": 0.06330609572854133, "learning_rate": 0.00016307298921508842, "loss": 0.8537, "step": 85040 }, { "epoch": 1.4931793044119455, "grad_norm": 0.058125203207492544, "learning_rate": 0.00016306430771648932, "loss": 0.8512, "step": 85050 }, { "epoch": 1.493354869291947, "grad_norm": 0.056121056513863066, "learning_rate": 0.0001630556254315235, "loss": 0.8488, "step": 85060 }, { "epoch": 1.4935304341719482, "grad_norm": 0.07989557970538212, "learning_rate": 0.00016304694236030104, "loss": 0.8519, "step": 85070 }, { "epoch": 1.4937059990519497, "grad_norm": 0.05415180253555022, "learning_rate": 0.00016303825850293193, "loss": 0.848, "step": 85080 }, { "epoch": 1.493881563931951, "grad_norm": 0.05996008738061254, "learning_rate": 0.0001630295738595262, "loss": 0.8514, "step": 85090 }, { "epoch": 1.4940571288119524, "grad_norm": 0.056911680979165186, "learning_rate": 0.00016302088843019388, "loss": 0.8481, "step": 85100 }, { "epoch": 1.4942326936919539, "grad_norm": 0.04945714197517774, "learning_rate": 0.00016301220221504503, "loss": 0.8485, "step": 85110 }, { "epoch": 1.4944082585719554, "grad_norm": 0.04714749303159526, "learning_rate": 0.0001630035152141897, "loss": 0.8516, "step": 85120 }, { "epoch": 1.4945838234519566, "grad_norm": 0.052296920888769305, "learning_rate": 0.00016299482742773786, "loss": 0.8538, "step": 85130 }, { "epoch": 1.494759388331958, "grad_norm": 0.05999899599214665, "learning_rate": 0.0001629861388557998, "loss": 0.8538, "step": 85140 }, { "epoch": 1.4949349532119593, "grad_norm": 0.06745093610033026, "learning_rate": 0.00016297744949848538, "loss": 0.8539, "step": 85150 }, { "epoch": 1.4951105180919608, "grad_norm": 0.048370962664320345, "learning_rate": 0.00016296875935590489, "loss": 0.8484, "step": 85160 }, { "epoch": 1.4952860829719623, "grad_norm": 0.06551952167915291, "learning_rate": 0.00016296006842816824, "loss": 0.8452, "step": 85170 }, { "epoch": 1.4954616478519638, "grad_norm": 0.07825809564141771, "learning_rate": 0.00016295137671538568, "loss": 0.8478, "step": 85180 }, { "epoch": 1.495637212731965, "grad_norm": 0.0695792884829635, "learning_rate": 0.00016294268421766737, "loss": 0.8501, "step": 85190 }, { "epoch": 1.4958127776119665, "grad_norm": 0.048016825503609646, "learning_rate": 0.00016293399093512333, "loss": 0.8514, "step": 85200 }, { "epoch": 1.4959883424919678, "grad_norm": 0.05875486305176089, "learning_rate": 0.00016292529686786378, "loss": 0.8484, "step": 85210 }, { "epoch": 1.4961639073719692, "grad_norm": 0.09267098532789439, "learning_rate": 0.0001629166020159988, "loss": 0.8477, "step": 85220 }, { "epoch": 1.4963394722519707, "grad_norm": 0.06733592745807193, "learning_rate": 0.00016290790637963864, "loss": 0.8546, "step": 85230 }, { "epoch": 1.4965150371319722, "grad_norm": 0.04724825306223196, "learning_rate": 0.00016289920995889344, "loss": 0.8445, "step": 85240 }, { "epoch": 1.4966906020119735, "grad_norm": 0.08620257709659636, "learning_rate": 0.00016289051275387339, "loss": 0.8515, "step": 85250 }, { "epoch": 1.496866166891975, "grad_norm": 0.0554718147362301, "learning_rate": 0.0001628818147646887, "loss": 0.8378, "step": 85260 }, { "epoch": 1.4970417317719764, "grad_norm": 0.07694900641142244, "learning_rate": 0.00016287311599144953, "loss": 0.8486, "step": 85270 }, { "epoch": 1.4972172966519777, "grad_norm": 0.05007222178657658, "learning_rate": 0.00016286441643426614, "loss": 0.8437, "step": 85280 }, { "epoch": 1.4973928615319791, "grad_norm": 0.07378223069001816, "learning_rate": 0.00016285571609324872, "loss": 0.8514, "step": 85290 }, { "epoch": 1.4975684264119806, "grad_norm": 0.05704071768024681, "learning_rate": 0.0001628470149685075, "loss": 0.849, "step": 85300 }, { "epoch": 1.4977439912919819, "grad_norm": 0.046743409754910474, "learning_rate": 0.0001628383130601528, "loss": 0.8459, "step": 85310 }, { "epoch": 1.4979195561719834, "grad_norm": 0.06704876370401013, "learning_rate": 0.0001628296103682948, "loss": 0.8519, "step": 85320 }, { "epoch": 1.4980951210519848, "grad_norm": 0.044754194361982656, "learning_rate": 0.00016282090689304378, "loss": 0.8463, "step": 85330 }, { "epoch": 1.498270685931986, "grad_norm": 0.05755186379176119, "learning_rate": 0.00016281220263451003, "loss": 0.8381, "step": 85340 }, { "epoch": 1.4984462508119876, "grad_norm": 0.08271502509191732, "learning_rate": 0.00016280349759280381, "loss": 0.8431, "step": 85350 }, { "epoch": 1.498621815691989, "grad_norm": 0.07955894807724696, "learning_rate": 0.00016279479176803545, "loss": 0.8499, "step": 85360 }, { "epoch": 1.4987973805719903, "grad_norm": 0.05073220190457187, "learning_rate": 0.00016278608516031524, "loss": 0.8396, "step": 85370 }, { "epoch": 1.4989729454519918, "grad_norm": 0.055461504334839436, "learning_rate": 0.0001627773777697535, "loss": 0.8519, "step": 85380 }, { "epoch": 1.4991485103319933, "grad_norm": 0.051768127213665756, "learning_rate": 0.00016276866959646048, "loss": 0.8521, "step": 85390 }, { "epoch": 1.4993240752119945, "grad_norm": 0.06294595064398639, "learning_rate": 0.00016275996064054663, "loss": 0.8415, "step": 85400 }, { "epoch": 1.499499640091996, "grad_norm": 0.05901077406401167, "learning_rate": 0.00016275125090212225, "loss": 0.851, "step": 85410 }, { "epoch": 1.4996752049719975, "grad_norm": 0.06391734006111374, "learning_rate": 0.00016274254038129766, "loss": 0.8512, "step": 85420 }, { "epoch": 1.499850769851999, "grad_norm": 0.05510738750434037, "learning_rate": 0.00016273382907818325, "loss": 0.8502, "step": 85430 }, { "epoch": 1.5000263347320002, "grad_norm": 0.050577122247998936, "learning_rate": 0.00016272511699288943, "loss": 0.8502, "step": 85440 }, { "epoch": 1.5002018996120015, "grad_norm": 0.0471238099165864, "learning_rate": 0.00016271640412552653, "loss": 0.8498, "step": 85450 }, { "epoch": 1.500377464492003, "grad_norm": 0.06307486157706155, "learning_rate": 0.00016270769047620497, "loss": 0.8532, "step": 85460 }, { "epoch": 1.5005530293720044, "grad_norm": 0.0635443781931037, "learning_rate": 0.0001626989760450351, "loss": 0.848, "step": 85470 }, { "epoch": 1.5007285942520059, "grad_norm": 0.09380020895070419, "learning_rate": 0.0001626902608321274, "loss": 0.846, "step": 85480 }, { "epoch": 1.5009041591320074, "grad_norm": 0.0647225702036555, "learning_rate": 0.00016268154483759228, "loss": 0.849, "step": 85490 }, { "epoch": 1.5010797240120086, "grad_norm": 0.0682100856311433, "learning_rate": 0.0001626728280615402, "loss": 0.8537, "step": 85500 }, { "epoch": 1.5012552888920099, "grad_norm": 0.052472152480540414, "learning_rate": 0.0001626641105040815, "loss": 0.8517, "step": 85510 }, { "epoch": 1.5014308537720114, "grad_norm": 0.06232311727949744, "learning_rate": 0.00016265539216532674, "loss": 0.8485, "step": 85520 }, { "epoch": 1.5016064186520128, "grad_norm": 0.061603883928408106, "learning_rate": 0.00016264667304538637, "loss": 0.8445, "step": 85530 }, { "epoch": 1.5017819835320143, "grad_norm": 0.05745857855712221, "learning_rate": 0.0001626379531443708, "loss": 0.8476, "step": 85540 }, { "epoch": 1.5019575484120158, "grad_norm": 0.079129672779203, "learning_rate": 0.00016262923246239058, "loss": 0.8466, "step": 85550 }, { "epoch": 1.502133113292017, "grad_norm": 0.0501990732208953, "learning_rate": 0.00016262051099955616, "loss": 0.8465, "step": 85560 }, { "epoch": 1.5023086781720185, "grad_norm": 0.07440203587616415, "learning_rate": 0.00016261178875597804, "loss": 0.8441, "step": 85570 }, { "epoch": 1.5024842430520198, "grad_norm": 0.07068421870261067, "learning_rate": 0.00016260306573176678, "loss": 0.8483, "step": 85580 }, { "epoch": 1.5026598079320213, "grad_norm": 0.06749126607644819, "learning_rate": 0.0001625943419270328, "loss": 0.8481, "step": 85590 }, { "epoch": 1.5028353728120227, "grad_norm": 0.05967033929803527, "learning_rate": 0.0001625856173418868, "loss": 0.8493, "step": 85600 }, { "epoch": 1.5030109376920242, "grad_norm": 0.05058458007127856, "learning_rate": 0.00016257689197643916, "loss": 0.8459, "step": 85610 }, { "epoch": 1.5031865025720255, "grad_norm": 0.0523622548065938, "learning_rate": 0.00016256816583080054, "loss": 0.8401, "step": 85620 }, { "epoch": 1.503362067452027, "grad_norm": 0.07579631732221498, "learning_rate": 0.00016255943890508144, "loss": 0.849, "step": 85630 }, { "epoch": 1.5035376323320282, "grad_norm": 0.06118679451780421, "learning_rate": 0.00016255071119939242, "loss": 0.848, "step": 85640 }, { "epoch": 1.5037131972120297, "grad_norm": 0.06358814472091377, "learning_rate": 0.00016254198271384413, "loss": 0.8462, "step": 85650 }, { "epoch": 1.5038887620920312, "grad_norm": 0.05235680101125393, "learning_rate": 0.0001625332534485471, "loss": 0.8483, "step": 85660 }, { "epoch": 1.5040643269720326, "grad_norm": 0.051116107672444254, "learning_rate": 0.000162524523403612, "loss": 0.8493, "step": 85670 }, { "epoch": 1.5042398918520339, "grad_norm": 0.05706348774239943, "learning_rate": 0.00016251579257914935, "loss": 0.8504, "step": 85680 }, { "epoch": 1.5044154567320354, "grad_norm": 0.06283099113551988, "learning_rate": 0.00016250706097526983, "loss": 0.8468, "step": 85690 }, { "epoch": 1.5045910216120366, "grad_norm": 0.06785363780458141, "learning_rate": 0.00016249832859208409, "loss": 0.8433, "step": 85700 }, { "epoch": 1.504766586492038, "grad_norm": 0.05313805933061661, "learning_rate": 0.0001624895954297027, "loss": 0.8543, "step": 85710 }, { "epoch": 1.5049421513720396, "grad_norm": 0.07005367376119512, "learning_rate": 0.0001624808614882364, "loss": 0.8519, "step": 85720 }, { "epoch": 1.505117716252041, "grad_norm": 0.06856601024398878, "learning_rate": 0.00016247212676779576, "loss": 0.8459, "step": 85730 }, { "epoch": 1.5052932811320423, "grad_norm": 0.07788595400484398, "learning_rate": 0.00016246339126849152, "loss": 0.8515, "step": 85740 }, { "epoch": 1.5054688460120438, "grad_norm": 0.06295773281039757, "learning_rate": 0.0001624546549904343, "loss": 0.8484, "step": 85750 }, { "epoch": 1.505644410892045, "grad_norm": 0.05527858201053732, "learning_rate": 0.00016244591793373482, "loss": 0.8466, "step": 85760 }, { "epoch": 1.5058199757720465, "grad_norm": 0.060878254649846024, "learning_rate": 0.00016243718009850385, "loss": 0.8487, "step": 85770 }, { "epoch": 1.505995540652048, "grad_norm": 0.07290879917452907, "learning_rate": 0.00016242844148485196, "loss": 0.8526, "step": 85780 }, { "epoch": 1.5061711055320495, "grad_norm": 0.05875182632784148, "learning_rate": 0.00016241970209288997, "loss": 0.8595, "step": 85790 }, { "epoch": 1.5063466704120507, "grad_norm": 0.06309584002727202, "learning_rate": 0.00016241096192272863, "loss": 0.8475, "step": 85800 }, { "epoch": 1.5065222352920522, "grad_norm": 0.0548415701823207, "learning_rate": 0.00016240222097447854, "loss": 0.8532, "step": 85810 }, { "epoch": 1.5066978001720535, "grad_norm": 0.056276370027825796, "learning_rate": 0.0001623934792482506, "loss": 0.8416, "step": 85820 }, { "epoch": 1.506873365052055, "grad_norm": 0.04577840504585291, "learning_rate": 0.00016238473674415552, "loss": 0.8614, "step": 85830 }, { "epoch": 1.5070489299320564, "grad_norm": 0.06231267807697012, "learning_rate": 0.00016237599346230404, "loss": 0.8556, "step": 85840 }, { "epoch": 1.507224494812058, "grad_norm": 0.048476374692895806, "learning_rate": 0.00016236724940280696, "loss": 0.8488, "step": 85850 }, { "epoch": 1.5074000596920591, "grad_norm": 0.059678883040279615, "learning_rate": 0.00016235850456577507, "loss": 0.847, "step": 85860 }, { "epoch": 1.5075756245720606, "grad_norm": 0.06944737800292897, "learning_rate": 0.0001623497589513192, "loss": 0.8587, "step": 85870 }, { "epoch": 1.5077511894520619, "grad_norm": 0.08266394824651445, "learning_rate": 0.0001623410125595501, "loss": 0.8474, "step": 85880 }, { "epoch": 1.5079267543320634, "grad_norm": 0.04732012313363443, "learning_rate": 0.0001623322653905786, "loss": 0.8508, "step": 85890 }, { "epoch": 1.5081023192120648, "grad_norm": 0.058003046854311645, "learning_rate": 0.00016232351744451554, "loss": 0.8496, "step": 85900 }, { "epoch": 1.5082778840920663, "grad_norm": 0.06727420650216087, "learning_rate": 0.00016231476872147183, "loss": 0.8465, "step": 85910 }, { "epoch": 1.5084534489720678, "grad_norm": 0.048054441113197455, "learning_rate": 0.00016230601922155816, "loss": 0.847, "step": 85920 }, { "epoch": 1.508629013852069, "grad_norm": 0.05688285350103343, "learning_rate": 0.0001622972689448855, "loss": 0.8478, "step": 85930 }, { "epoch": 1.5088045787320703, "grad_norm": 0.048554361023743664, "learning_rate": 0.00016228851789156472, "loss": 0.8463, "step": 85940 }, { "epoch": 1.5089801436120718, "grad_norm": 0.05489045583231937, "learning_rate": 0.00016227976606170667, "loss": 0.8473, "step": 85950 }, { "epoch": 1.5091557084920733, "grad_norm": 0.05296501137175214, "learning_rate": 0.00016227101345542223, "loss": 0.8544, "step": 85960 }, { "epoch": 1.5093312733720747, "grad_norm": 0.055868744061980405, "learning_rate": 0.0001622622600728223, "loss": 0.847, "step": 85970 }, { "epoch": 1.5095068382520762, "grad_norm": 0.0805085521443216, "learning_rate": 0.0001622535059140178, "loss": 0.8497, "step": 85980 }, { "epoch": 1.5096824031320775, "grad_norm": 0.06327830308869412, "learning_rate": 0.00016224475097911966, "loss": 0.8441, "step": 85990 }, { "epoch": 1.5098579680120787, "grad_norm": 0.053776983724978696, "learning_rate": 0.00016223599526823875, "loss": 0.841, "step": 86000 }, { "epoch": 1.5100335328920802, "grad_norm": 0.06253812142781577, "learning_rate": 0.0001622272387814861, "loss": 0.8491, "step": 86010 }, { "epoch": 1.5102090977720817, "grad_norm": 0.05938730128778984, "learning_rate": 0.00016221848151897253, "loss": 0.8485, "step": 86020 }, { "epoch": 1.5103846626520832, "grad_norm": 0.06419680886680385, "learning_rate": 0.0001622097234808091, "loss": 0.8423, "step": 86030 }, { "epoch": 1.5105602275320846, "grad_norm": 0.06477009006894974, "learning_rate": 0.00016220096466710676, "loss": 0.8442, "step": 86040 }, { "epoch": 1.5107357924120859, "grad_norm": 0.05908773950606371, "learning_rate": 0.00016219220507797646, "loss": 0.8497, "step": 86050 }, { "epoch": 1.5109113572920871, "grad_norm": 0.057562583469793295, "learning_rate": 0.0001621834447135292, "loss": 0.8466, "step": 86060 }, { "epoch": 1.5110869221720886, "grad_norm": 0.053319958319703015, "learning_rate": 0.00016217468357387595, "loss": 0.8524, "step": 86070 }, { "epoch": 1.51126248705209, "grad_norm": 0.06138819568067083, "learning_rate": 0.00016216592165912776, "loss": 0.8533, "step": 86080 }, { "epoch": 1.5114380519320916, "grad_norm": 0.06378648442746798, "learning_rate": 0.00016215715896939558, "loss": 0.8519, "step": 86090 }, { "epoch": 1.511613616812093, "grad_norm": 0.0528837104798008, "learning_rate": 0.00016214839550479052, "loss": 0.8464, "step": 86100 }, { "epoch": 1.5117891816920943, "grad_norm": 0.05824481337113094, "learning_rate": 0.00016213963126542353, "loss": 0.8455, "step": 86110 }, { "epoch": 1.5119647465720956, "grad_norm": 0.0677577243028919, "learning_rate": 0.0001621308662514057, "loss": 0.8465, "step": 86120 }, { "epoch": 1.512140311452097, "grad_norm": 0.057872580437550405, "learning_rate": 0.00016212210046284811, "loss": 0.8498, "step": 86130 }, { "epoch": 1.5123158763320985, "grad_norm": 0.057625977492178126, "learning_rate": 0.00016211333389986178, "loss": 0.8566, "step": 86140 }, { "epoch": 1.5124914412121, "grad_norm": 0.04671276197760504, "learning_rate": 0.0001621045665625578, "loss": 0.8464, "step": 86150 }, { "epoch": 1.5126670060921015, "grad_norm": 0.06009961833816527, "learning_rate": 0.00016209579845104724, "loss": 0.8486, "step": 86160 }, { "epoch": 1.5128425709721027, "grad_norm": 0.04964831102771233, "learning_rate": 0.00016208702956544118, "loss": 0.85, "step": 86170 }, { "epoch": 1.513018135852104, "grad_norm": 0.053023440932321524, "learning_rate": 0.00016207825990585082, "loss": 0.8481, "step": 86180 }, { "epoch": 1.5131937007321055, "grad_norm": 0.05392135612593896, "learning_rate": 0.00016206948947238714, "loss": 0.8466, "step": 86190 }, { "epoch": 1.513369265612107, "grad_norm": 0.05058214363273187, "learning_rate": 0.00016206071826516133, "loss": 0.8582, "step": 86200 }, { "epoch": 1.5135448304921084, "grad_norm": 0.07926169676809618, "learning_rate": 0.00016205194628428452, "loss": 0.8525, "step": 86210 }, { "epoch": 1.51372039537211, "grad_norm": 0.05083987334013135, "learning_rate": 0.0001620431735298679, "loss": 0.8497, "step": 86220 }, { "epoch": 1.5138959602521112, "grad_norm": 0.057070676618394156, "learning_rate": 0.0001620344000020225, "loss": 0.8538, "step": 86230 }, { "epoch": 1.5140715251321124, "grad_norm": 0.0481101888779182, "learning_rate": 0.0001620256257008596, "loss": 0.8414, "step": 86240 }, { "epoch": 1.5142470900121139, "grad_norm": 0.0854041821862504, "learning_rate": 0.00016201685062649032, "loss": 0.8374, "step": 86250 }, { "epoch": 1.5144226548921154, "grad_norm": 0.05122869848677946, "learning_rate": 0.00016200807477902582, "loss": 0.8494, "step": 86260 }, { "epoch": 1.5145982197721168, "grad_norm": 0.06875814154082803, "learning_rate": 0.00016199929815857735, "loss": 0.8431, "step": 86270 }, { "epoch": 1.5147737846521183, "grad_norm": 0.05216343431174146, "learning_rate": 0.00016199052076525604, "loss": 0.8507, "step": 86280 }, { "epoch": 1.5149493495321196, "grad_norm": 0.06286092979801777, "learning_rate": 0.00016198174259917321, "loss": 0.845, "step": 86290 }, { "epoch": 1.515124914412121, "grad_norm": 0.058617679396980364, "learning_rate": 0.00016197296366044, "loss": 0.8457, "step": 86300 }, { "epoch": 1.5153004792921223, "grad_norm": 0.0629213830452067, "learning_rate": 0.00016196418394916762, "loss": 0.8512, "step": 86310 }, { "epoch": 1.5154760441721238, "grad_norm": 0.063616070033483, "learning_rate": 0.00016195540346546737, "loss": 0.8512, "step": 86320 }, { "epoch": 1.5156516090521253, "grad_norm": 0.062021907951642544, "learning_rate": 0.00016194662220945044, "loss": 0.8456, "step": 86330 }, { "epoch": 1.5158271739321267, "grad_norm": 0.06825838205973485, "learning_rate": 0.00016193784018122818, "loss": 0.8521, "step": 86340 }, { "epoch": 1.516002738812128, "grad_norm": 0.08942697390639488, "learning_rate": 0.00016192905738091176, "loss": 0.8443, "step": 86350 }, { "epoch": 1.5161783036921295, "grad_norm": 0.06263521244464022, "learning_rate": 0.00016192027380861255, "loss": 0.8564, "step": 86360 }, { "epoch": 1.5163538685721307, "grad_norm": 0.0745167347032606, "learning_rate": 0.00016191148946444176, "loss": 0.8444, "step": 86370 }, { "epoch": 1.5165294334521322, "grad_norm": 0.05716968926478812, "learning_rate": 0.00016190270434851073, "loss": 0.8442, "step": 86380 }, { "epoch": 1.5167049983321337, "grad_norm": 0.06705683538516093, "learning_rate": 0.0001618939184609308, "loss": 0.8497, "step": 86390 }, { "epoch": 1.5168805632121352, "grad_norm": 0.06766030002074785, "learning_rate": 0.0001618851318018132, "loss": 0.8455, "step": 86400 }, { "epoch": 1.5170561280921364, "grad_norm": 0.08243371146166309, "learning_rate": 0.00016187634437126935, "loss": 0.8453, "step": 86410 }, { "epoch": 1.517231692972138, "grad_norm": 0.05807804443216681, "learning_rate": 0.00016186755616941055, "loss": 0.8487, "step": 86420 }, { "epoch": 1.5174072578521391, "grad_norm": 0.06329721458983907, "learning_rate": 0.0001618587671963481, "loss": 0.8523, "step": 86430 }, { "epoch": 1.5175828227321406, "grad_norm": 0.061765758744956274, "learning_rate": 0.00016184997745219345, "loss": 0.8524, "step": 86440 }, { "epoch": 1.517758387612142, "grad_norm": 0.050386214792974164, "learning_rate": 0.00016184118693705793, "loss": 0.8483, "step": 86450 }, { "epoch": 1.5179339524921436, "grad_norm": 0.07514813983635174, "learning_rate": 0.00016183239565105287, "loss": 0.8524, "step": 86460 }, { "epoch": 1.5181095173721448, "grad_norm": 0.05221487742288582, "learning_rate": 0.00016182360359428972, "loss": 0.8468, "step": 86470 }, { "epoch": 1.5182850822521463, "grad_norm": 0.05974335394998764, "learning_rate": 0.00016181481076687988, "loss": 0.8488, "step": 86480 }, { "epoch": 1.5184606471321476, "grad_norm": 0.054529064795244074, "learning_rate": 0.0001618060171689347, "loss": 0.8509, "step": 86490 }, { "epoch": 1.518636212012149, "grad_norm": 0.06346294034341483, "learning_rate": 0.00016179722280056566, "loss": 0.8437, "step": 86500 }, { "epoch": 1.5188117768921505, "grad_norm": 0.05214411557545104, "learning_rate": 0.0001617884276618841, "loss": 0.843, "step": 86510 }, { "epoch": 1.518987341772152, "grad_norm": 0.047725496393188675, "learning_rate": 0.00016177963175300158, "loss": 0.8448, "step": 86520 }, { "epoch": 1.5191629066521533, "grad_norm": 0.05004892542516842, "learning_rate": 0.00016177083507402942, "loss": 0.8485, "step": 86530 }, { "epoch": 1.5193384715321547, "grad_norm": 0.05691287859981427, "learning_rate": 0.00016176203762507916, "loss": 0.8471, "step": 86540 }, { "epoch": 1.519514036412156, "grad_norm": 0.07395482278608699, "learning_rate": 0.00016175323940626222, "loss": 0.8511, "step": 86550 }, { "epoch": 1.5196896012921575, "grad_norm": 0.0564465428307387, "learning_rate": 0.00016174444041769015, "loss": 0.8486, "step": 86560 }, { "epoch": 1.519865166172159, "grad_norm": 0.08348240411103278, "learning_rate": 0.00016173564065947432, "loss": 0.8418, "step": 86570 }, { "epoch": 1.5200407310521604, "grad_norm": 0.05349076752897858, "learning_rate": 0.0001617268401317263, "loss": 0.8503, "step": 86580 }, { "epoch": 1.5202162959321617, "grad_norm": 0.08011798428548436, "learning_rate": 0.00016171803883455757, "loss": 0.8517, "step": 86590 }, { "epoch": 1.5203918608121632, "grad_norm": 0.06193740060523879, "learning_rate": 0.00016170923676807968, "loss": 0.8467, "step": 86600 }, { "epoch": 1.5205674256921644, "grad_norm": 0.05833356462329386, "learning_rate": 0.0001617004339324041, "loss": 0.8398, "step": 86610 }, { "epoch": 1.5207429905721659, "grad_norm": 0.05208778570037501, "learning_rate": 0.0001616916303276424, "loss": 0.8489, "step": 86620 }, { "epoch": 1.5209185554521674, "grad_norm": 0.08481433090438993, "learning_rate": 0.00016168282595390611, "loss": 0.8504, "step": 86630 }, { "epoch": 1.5210941203321688, "grad_norm": 0.05310315895826084, "learning_rate": 0.00016167402081130675, "loss": 0.8497, "step": 86640 }, { "epoch": 1.5212696852121703, "grad_norm": 0.05912947420059262, "learning_rate": 0.00016166521489995598, "loss": 0.8467, "step": 86650 }, { "epoch": 1.5214452500921716, "grad_norm": 0.08268700658489052, "learning_rate": 0.00016165640821996525, "loss": 0.857, "step": 86660 }, { "epoch": 1.5216208149721728, "grad_norm": 0.07699342378675848, "learning_rate": 0.00016164760077144624, "loss": 0.8507, "step": 86670 }, { "epoch": 1.5217963798521743, "grad_norm": 0.06191423168575263, "learning_rate": 0.00016163879255451052, "loss": 0.8458, "step": 86680 }, { "epoch": 1.5219719447321758, "grad_norm": 0.05799929760574704, "learning_rate": 0.00016162998356926967, "loss": 0.8483, "step": 86690 }, { "epoch": 1.5221475096121773, "grad_norm": 0.061434988788709156, "learning_rate": 0.0001616211738158353, "loss": 0.8488, "step": 86700 }, { "epoch": 1.5223230744921787, "grad_norm": 0.05771688749038098, "learning_rate": 0.000161612363294319, "loss": 0.8468, "step": 86710 }, { "epoch": 1.52249863937218, "grad_norm": 0.0734842233832023, "learning_rate": 0.0001616035520048325, "loss": 0.8509, "step": 86720 }, { "epoch": 1.5226742042521813, "grad_norm": 0.06116457811116448, "learning_rate": 0.00016159473994748733, "loss": 0.8491, "step": 86730 }, { "epoch": 1.5228497691321827, "grad_norm": 0.09351058420269774, "learning_rate": 0.00016158592712239522, "loss": 0.8464, "step": 86740 }, { "epoch": 1.5230253340121842, "grad_norm": 0.06388689818887336, "learning_rate": 0.0001615771135296678, "loss": 0.8505, "step": 86750 }, { "epoch": 1.5232008988921857, "grad_norm": 0.06550855651603729, "learning_rate": 0.00016156829916941676, "loss": 0.8534, "step": 86760 }, { "epoch": 1.5233764637721872, "grad_norm": 0.06343404774389007, "learning_rate": 0.00016155948404175373, "loss": 0.8461, "step": 86770 }, { "epoch": 1.5235520286521884, "grad_norm": 0.04935512151307696, "learning_rate": 0.00016155066814679044, "loss": 0.8517, "step": 86780 }, { "epoch": 1.5237275935321897, "grad_norm": 0.050634705054338904, "learning_rate": 0.0001615418514846386, "loss": 0.8459, "step": 86790 }, { "epoch": 1.5239031584121912, "grad_norm": 0.055821547945348306, "learning_rate": 0.00016153303405540986, "loss": 0.8535, "step": 86800 }, { "epoch": 1.5240787232921926, "grad_norm": 0.06255330923742852, "learning_rate": 0.000161524215859216, "loss": 0.8575, "step": 86810 }, { "epoch": 1.524254288172194, "grad_norm": 0.06429462059806693, "learning_rate": 0.00016151539689616872, "loss": 0.8485, "step": 86820 }, { "epoch": 1.5244298530521956, "grad_norm": 0.05726636350707398, "learning_rate": 0.00016150657716637977, "loss": 0.8523, "step": 86830 }, { "epoch": 1.5246054179321968, "grad_norm": 0.05251033215479348, "learning_rate": 0.00016149775666996093, "loss": 0.8474, "step": 86840 }, { "epoch": 1.524780982812198, "grad_norm": 0.06147636205708735, "learning_rate": 0.00016148893540702386, "loss": 0.847, "step": 86850 }, { "epoch": 1.5249565476921996, "grad_norm": 0.05120487190724601, "learning_rate": 0.0001614801133776804, "loss": 0.8428, "step": 86860 }, { "epoch": 1.525132112572201, "grad_norm": 0.04901220770076652, "learning_rate": 0.00016147129058204231, "loss": 0.8573, "step": 86870 }, { "epoch": 1.5253076774522025, "grad_norm": 0.06861281662469745, "learning_rate": 0.0001614624670202214, "loss": 0.8484, "step": 86880 }, { "epoch": 1.525483242332204, "grad_norm": 0.08977434293422712, "learning_rate": 0.00016145364269232944, "loss": 0.8447, "step": 86890 }, { "epoch": 1.5256588072122053, "grad_norm": 0.05456233004690548, "learning_rate": 0.0001614448175984782, "loss": 0.849, "step": 86900 }, { "epoch": 1.5258343720922065, "grad_norm": 0.08800706328168684, "learning_rate": 0.0001614359917387796, "loss": 0.8451, "step": 86910 }, { "epoch": 1.526009936972208, "grad_norm": 0.051605202513334204, "learning_rate": 0.00016142716511334534, "loss": 0.8517, "step": 86920 }, { "epoch": 1.5261855018522095, "grad_norm": 0.07941939359691608, "learning_rate": 0.00016141833772228737, "loss": 0.8493, "step": 86930 }, { "epoch": 1.526361066732211, "grad_norm": 0.04309566701474835, "learning_rate": 0.00016140950956571747, "loss": 0.8459, "step": 86940 }, { "epoch": 1.5265366316122124, "grad_norm": 0.08269895612275076, "learning_rate": 0.00016140068064374747, "loss": 0.8459, "step": 86950 }, { "epoch": 1.5267121964922137, "grad_norm": 0.04610401384235539, "learning_rate": 0.00016139185095648934, "loss": 0.8532, "step": 86960 }, { "epoch": 1.526887761372215, "grad_norm": 0.06401566033960888, "learning_rate": 0.00016138302050405483, "loss": 0.8451, "step": 86970 }, { "epoch": 1.5270633262522164, "grad_norm": 0.06639276073344114, "learning_rate": 0.00016137418928655587, "loss": 0.8474, "step": 86980 }, { "epoch": 1.527238891132218, "grad_norm": 0.06387142394216752, "learning_rate": 0.0001613653573041044, "loss": 0.854, "step": 86990 }, { "epoch": 1.5274144560122194, "grad_norm": 0.05232708542064106, "learning_rate": 0.00016135652455681224, "loss": 0.8553, "step": 87000 }, { "epoch": 1.5275900208922208, "grad_norm": 0.04840983355797537, "learning_rate": 0.0001613476910447914, "loss": 0.849, "step": 87010 }, { "epoch": 1.527765585772222, "grad_norm": 0.06647213265026873, "learning_rate": 0.00016133885676815373, "loss": 0.8504, "step": 87020 }, { "epoch": 1.5279411506522236, "grad_norm": 0.0580104884005136, "learning_rate": 0.00016133002172701117, "loss": 0.8544, "step": 87030 }, { "epoch": 1.5281167155322248, "grad_norm": 0.05483272549226063, "learning_rate": 0.00016132118592147567, "loss": 0.8479, "step": 87040 }, { "epoch": 1.5282922804122263, "grad_norm": 0.04904221453829947, "learning_rate": 0.00016131234935165916, "loss": 0.8387, "step": 87050 }, { "epoch": 1.5284678452922278, "grad_norm": 0.06126613812093932, "learning_rate": 0.00016130351201767369, "loss": 0.845, "step": 87060 }, { "epoch": 1.5286434101722293, "grad_norm": 0.06595228859161271, "learning_rate": 0.00016129467391963112, "loss": 0.8605, "step": 87070 }, { "epoch": 1.5288189750522305, "grad_norm": 0.05187293609306512, "learning_rate": 0.0001612858350576435, "loss": 0.855, "step": 87080 }, { "epoch": 1.528994539932232, "grad_norm": 0.08092952635345907, "learning_rate": 0.00016127699543182276, "loss": 0.856, "step": 87090 }, { "epoch": 1.5291701048122333, "grad_norm": 0.07301361950208915, "learning_rate": 0.000161268155042281, "loss": 0.8433, "step": 87100 }, { "epoch": 1.5293456696922347, "grad_norm": 0.04697799891793817, "learning_rate": 0.0001612593138891301, "loss": 0.8467, "step": 87110 }, { "epoch": 1.5295212345722362, "grad_norm": 0.0796054616909619, "learning_rate": 0.00016125047197248216, "loss": 0.849, "step": 87120 }, { "epoch": 1.5296967994522377, "grad_norm": 0.06399707729951509, "learning_rate": 0.00016124162929244922, "loss": 0.8447, "step": 87130 }, { "epoch": 1.529872364332239, "grad_norm": 0.07740700001596557, "learning_rate": 0.00016123278584914325, "loss": 0.8562, "step": 87140 }, { "epoch": 1.5300479292122404, "grad_norm": 0.0908341519632411, "learning_rate": 0.00016122394164267637, "loss": 0.8486, "step": 87150 }, { "epoch": 1.5302234940922417, "grad_norm": 0.10163251479421867, "learning_rate": 0.0001612150966731606, "loss": 0.8477, "step": 87160 }, { "epoch": 1.5303990589722432, "grad_norm": 0.09429663817832137, "learning_rate": 0.000161206250940708, "loss": 0.85, "step": 87170 }, { "epoch": 1.5305746238522446, "grad_norm": 0.06658048182586411, "learning_rate": 0.00016119740444543067, "loss": 0.8443, "step": 87180 }, { "epoch": 1.530750188732246, "grad_norm": 0.05406209280113798, "learning_rate": 0.0001611885571874407, "loss": 0.8562, "step": 87190 }, { "epoch": 1.5309257536122474, "grad_norm": 0.059237322677251916, "learning_rate": 0.0001611797091668502, "loss": 0.8458, "step": 87200 }, { "epoch": 1.5311013184922488, "grad_norm": 0.05309648243252786, "learning_rate": 0.00016117086038377118, "loss": 0.8449, "step": 87210 }, { "epoch": 1.53127688337225, "grad_norm": 0.06457631692850127, "learning_rate": 0.00016116201083831588, "loss": 0.8515, "step": 87220 }, { "epoch": 1.5314524482522516, "grad_norm": 0.09518947025961881, "learning_rate": 0.00016115316053059636, "loss": 0.8471, "step": 87230 }, { "epoch": 1.531628013132253, "grad_norm": 0.10593933165237389, "learning_rate": 0.0001611443094607248, "loss": 0.8511, "step": 87240 }, { "epoch": 1.5318035780122545, "grad_norm": 0.09908597014959801, "learning_rate": 0.00016113545762881328, "loss": 0.8418, "step": 87250 }, { "epoch": 1.5319791428922558, "grad_norm": 0.050837745246552817, "learning_rate": 0.00016112660503497396, "loss": 0.8493, "step": 87260 }, { "epoch": 1.5321547077722573, "grad_norm": 0.05955484770390563, "learning_rate": 0.00016111775167931906, "loss": 0.8596, "step": 87270 }, { "epoch": 1.5323302726522585, "grad_norm": 0.05704589416456655, "learning_rate": 0.0001611088975619607, "loss": 0.8454, "step": 87280 }, { "epoch": 1.53250583753226, "grad_norm": 0.05798373116837513, "learning_rate": 0.00016110004268301114, "loss": 0.8528, "step": 87290 }, { "epoch": 1.5326814024122615, "grad_norm": 0.05668659137455504, "learning_rate": 0.0001610911870425825, "loss": 0.8437, "step": 87300 }, { "epoch": 1.532856967292263, "grad_norm": 0.06896407700024566, "learning_rate": 0.000161082330640787, "loss": 0.8466, "step": 87310 }, { "epoch": 1.5330325321722642, "grad_norm": 0.07653411216899161, "learning_rate": 0.00016107347347773686, "loss": 0.8505, "step": 87320 }, { "epoch": 1.5332080970522657, "grad_norm": 0.07065416197429104, "learning_rate": 0.0001610646155535443, "loss": 0.8461, "step": 87330 }, { "epoch": 1.533383661932267, "grad_norm": 0.04906838958087765, "learning_rate": 0.00016105575686832152, "loss": 0.8596, "step": 87340 }, { "epoch": 1.5335592268122684, "grad_norm": 0.0896259078849688, "learning_rate": 0.00016104689742218081, "loss": 0.8552, "step": 87350 }, { "epoch": 1.53373479169227, "grad_norm": 0.0652538895072593, "learning_rate": 0.0001610380372152344, "loss": 0.8498, "step": 87360 }, { "epoch": 1.5339103565722714, "grad_norm": 0.04449124333082787, "learning_rate": 0.0001610291762475946, "loss": 0.8502, "step": 87370 }, { "epoch": 1.5340859214522728, "grad_norm": 0.07342958265951748, "learning_rate": 0.00016102031451937357, "loss": 0.8453, "step": 87380 }, { "epoch": 1.534261486332274, "grad_norm": 0.07231260951152439, "learning_rate": 0.00016101145203068366, "loss": 0.8495, "step": 87390 }, { "epoch": 1.5344370512122754, "grad_norm": 0.06410511641247607, "learning_rate": 0.00016100258878163715, "loss": 0.8488, "step": 87400 }, { "epoch": 1.5346126160922768, "grad_norm": 0.05746891951317036, "learning_rate": 0.00016099372477234637, "loss": 0.8424, "step": 87410 }, { "epoch": 1.5347881809722783, "grad_norm": 0.07055229471665345, "learning_rate": 0.0001609848600029236, "loss": 0.8421, "step": 87420 }, { "epoch": 1.5349637458522798, "grad_norm": 0.0641875069671382, "learning_rate": 0.00016097599447348113, "loss": 0.8461, "step": 87430 }, { "epoch": 1.5351393107322813, "grad_norm": 0.0672263308602931, "learning_rate": 0.0001609671281841313, "loss": 0.8498, "step": 87440 }, { "epoch": 1.5353148756122825, "grad_norm": 0.0540187387652721, "learning_rate": 0.00016095826113498652, "loss": 0.8411, "step": 87450 }, { "epoch": 1.5354904404922838, "grad_norm": 0.07837583629926312, "learning_rate": 0.00016094939332615904, "loss": 0.8471, "step": 87460 }, { "epoch": 1.5356660053722853, "grad_norm": 0.05997264869302118, "learning_rate": 0.00016094052475776127, "loss": 0.8516, "step": 87470 }, { "epoch": 1.5358415702522867, "grad_norm": 0.051542115722653346, "learning_rate": 0.00016093165542990553, "loss": 0.8514, "step": 87480 }, { "epoch": 1.5360171351322882, "grad_norm": 0.05936927701813411, "learning_rate": 0.00016092278534270428, "loss": 0.8498, "step": 87490 }, { "epoch": 1.5361927000122897, "grad_norm": 0.053781518579854526, "learning_rate": 0.00016091391449626983, "loss": 0.8404, "step": 87500 }, { "epoch": 1.536368264892291, "grad_norm": 0.07623883831972471, "learning_rate": 0.00016090504289071459, "loss": 0.8488, "step": 87510 }, { "epoch": 1.5365438297722922, "grad_norm": 0.05255936214948905, "learning_rate": 0.000160896170526151, "loss": 0.8524, "step": 87520 }, { "epoch": 1.5367193946522937, "grad_norm": 0.08313722369180819, "learning_rate": 0.00016088729740269147, "loss": 0.8522, "step": 87530 }, { "epoch": 1.5368949595322952, "grad_norm": 0.052804189343949155, "learning_rate": 0.00016087842352044838, "loss": 0.8387, "step": 87540 }, { "epoch": 1.5370705244122966, "grad_norm": 0.059734540653162936, "learning_rate": 0.00016086954887953417, "loss": 0.8458, "step": 87550 }, { "epoch": 1.5372460892922981, "grad_norm": 0.06979985560434786, "learning_rate": 0.0001608606734800613, "loss": 0.8459, "step": 87560 }, { "epoch": 1.5374216541722994, "grad_norm": 0.06211247914637952, "learning_rate": 0.00016085179732214225, "loss": 0.8446, "step": 87570 }, { "epoch": 1.5375972190523006, "grad_norm": 0.059060646566745235, "learning_rate": 0.00016084292040588946, "loss": 0.8548, "step": 87580 }, { "epoch": 1.537772783932302, "grad_norm": 0.07040032833723661, "learning_rate": 0.0001608340427314154, "loss": 0.8546, "step": 87590 }, { "epoch": 1.5379483488123036, "grad_norm": 0.04967446449371587, "learning_rate": 0.00016082516429883253, "loss": 0.8519, "step": 87600 }, { "epoch": 1.538123913692305, "grad_norm": 0.0620176248655262, "learning_rate": 0.0001608162851082534, "loss": 0.8491, "step": 87610 }, { "epoch": 1.5382994785723065, "grad_norm": 0.06416613123765057, "learning_rate": 0.00016080740515979044, "loss": 0.8481, "step": 87620 }, { "epoch": 1.5384750434523078, "grad_norm": 0.0683890551241037, "learning_rate": 0.0001607985244535562, "loss": 0.8476, "step": 87630 }, { "epoch": 1.538650608332309, "grad_norm": 0.05795003118731202, "learning_rate": 0.0001607896429896632, "loss": 0.8475, "step": 87640 }, { "epoch": 1.5388261732123105, "grad_norm": 0.059713221295219596, "learning_rate": 0.00016078076076822396, "loss": 0.8475, "step": 87650 }, { "epoch": 1.539001738092312, "grad_norm": 0.054975066890178434, "learning_rate": 0.000160771877789351, "loss": 0.8543, "step": 87660 }, { "epoch": 1.5391773029723135, "grad_norm": 0.05927370398092108, "learning_rate": 0.00016076299405315693, "loss": 0.8499, "step": 87670 }, { "epoch": 1.539352867852315, "grad_norm": 0.056411845635283515, "learning_rate": 0.00016075410955975424, "loss": 0.857, "step": 87680 }, { "epoch": 1.5395284327323162, "grad_norm": 0.05807270826615168, "learning_rate": 0.00016074522430925558, "loss": 0.8454, "step": 87690 }, { "epoch": 1.5397039976123177, "grad_norm": 0.0457204141384161, "learning_rate": 0.00016073633830177345, "loss": 0.8428, "step": 87700 }, { "epoch": 1.539879562492319, "grad_norm": 0.04756695590700125, "learning_rate": 0.00016072745153742046, "loss": 0.8553, "step": 87710 }, { "epoch": 1.5400551273723204, "grad_norm": 0.07280462075296015, "learning_rate": 0.0001607185640163092, "loss": 0.8487, "step": 87720 }, { "epoch": 1.540230692252322, "grad_norm": 0.07437366113979114, "learning_rate": 0.00016070967573855234, "loss": 0.8509, "step": 87730 }, { "epoch": 1.5404062571323234, "grad_norm": 0.04774820171415767, "learning_rate": 0.0001607007867042624, "loss": 0.8526, "step": 87740 }, { "epoch": 1.5405818220123246, "grad_norm": 0.05420414238923236, "learning_rate": 0.00016069189691355205, "loss": 0.8525, "step": 87750 }, { "epoch": 1.540757386892326, "grad_norm": 0.05015751928265845, "learning_rate": 0.00016068300636653396, "loss": 0.8408, "step": 87760 }, { "epoch": 1.5409329517723274, "grad_norm": 0.05923379846443744, "learning_rate": 0.00016067411506332072, "loss": 0.8376, "step": 87770 }, { "epoch": 1.5411085166523288, "grad_norm": 0.05461698614414783, "learning_rate": 0.000160665223004025, "loss": 0.8451, "step": 87780 }, { "epoch": 1.5412840815323303, "grad_norm": 0.056349955354787234, "learning_rate": 0.0001606563301887595, "loss": 0.8392, "step": 87790 }, { "epoch": 1.5414596464123318, "grad_norm": 0.08569945676805348, "learning_rate": 0.0001606474366176368, "loss": 0.8524, "step": 87800 }, { "epoch": 1.541635211292333, "grad_norm": 0.05197683106540259, "learning_rate": 0.0001606385422907697, "loss": 0.8498, "step": 87810 }, { "epoch": 1.5418107761723345, "grad_norm": 0.05393253961275397, "learning_rate": 0.00016062964720827078, "loss": 0.8518, "step": 87820 }, { "epoch": 1.5419863410523358, "grad_norm": 0.07610877284370929, "learning_rate": 0.00016062075137025287, "loss": 0.8511, "step": 87830 }, { "epoch": 1.5421619059323373, "grad_norm": 0.050628250674311905, "learning_rate": 0.0001606118547768286, "loss": 0.8456, "step": 87840 }, { "epoch": 1.5423374708123387, "grad_norm": 0.13170136400283008, "learning_rate": 0.00016060295742811067, "loss": 0.8499, "step": 87850 }, { "epoch": 1.5425130356923402, "grad_norm": 0.04651088554056199, "learning_rate": 0.0001605940593242119, "loss": 0.8522, "step": 87860 }, { "epoch": 1.5426886005723415, "grad_norm": 0.05349352984971621, "learning_rate": 0.00016058516046524494, "loss": 0.8372, "step": 87870 }, { "epoch": 1.542864165452343, "grad_norm": 0.07119577885276931, "learning_rate": 0.0001605762608513226, "loss": 0.8492, "step": 87880 }, { "epoch": 1.5430397303323442, "grad_norm": 0.04957065353699747, "learning_rate": 0.0001605673604825576, "loss": 0.8546, "step": 87890 }, { "epoch": 1.5432152952123457, "grad_norm": 0.06799939258613708, "learning_rate": 0.0001605584593590627, "loss": 0.849, "step": 87900 }, { "epoch": 1.5433908600923472, "grad_norm": 0.06489945452482677, "learning_rate": 0.00016054955748095076, "loss": 0.8538, "step": 87910 }, { "epoch": 1.5435664249723486, "grad_norm": 0.06237649670453726, "learning_rate": 0.0001605406548483345, "loss": 0.8497, "step": 87920 }, { "epoch": 1.54374198985235, "grad_norm": 0.0788560675158259, "learning_rate": 0.00016053175146132673, "loss": 0.8488, "step": 87930 }, { "epoch": 1.5439175547323514, "grad_norm": 0.07864765688486974, "learning_rate": 0.00016052284732004024, "loss": 0.8551, "step": 87940 }, { "epoch": 1.5440931196123526, "grad_norm": 0.06374191006307688, "learning_rate": 0.00016051394242458787, "loss": 0.8443, "step": 87950 }, { "epoch": 1.544268684492354, "grad_norm": 0.05335728777712646, "learning_rate": 0.00016050503677508246, "loss": 0.848, "step": 87960 }, { "epoch": 1.5444442493723556, "grad_norm": 0.07922264242369605, "learning_rate": 0.0001604961303716368, "loss": 0.8404, "step": 87970 }, { "epoch": 1.544619814252357, "grad_norm": 0.05412170908488372, "learning_rate": 0.00016048722321436383, "loss": 0.8492, "step": 87980 }, { "epoch": 1.5447953791323583, "grad_norm": 0.05647199280411568, "learning_rate": 0.00016047831530337627, "loss": 0.8514, "step": 87990 }, { "epoch": 1.5449709440123598, "grad_norm": 0.061543795564335896, "learning_rate": 0.0001604694066387871, "loss": 0.8488, "step": 88000 }, { "epoch": 1.545146508892361, "grad_norm": 0.050694011367287774, "learning_rate": 0.00016046049722070915, "loss": 0.8501, "step": 88010 }, { "epoch": 1.5453220737723625, "grad_norm": 0.051124258563554285, "learning_rate": 0.00016045158704925525, "loss": 0.8494, "step": 88020 }, { "epoch": 1.545497638652364, "grad_norm": 0.06302862810433006, "learning_rate": 0.00016044267612453842, "loss": 0.8473, "step": 88030 }, { "epoch": 1.5456732035323655, "grad_norm": 0.06096943740106845, "learning_rate": 0.00016043376444667146, "loss": 0.8454, "step": 88040 }, { "epoch": 1.5458487684123667, "grad_norm": 0.0645368290324632, "learning_rate": 0.00016042485201576729, "loss": 0.8383, "step": 88050 }, { "epoch": 1.5460243332923682, "grad_norm": 0.049624653515132525, "learning_rate": 0.00016041593883193888, "loss": 0.8462, "step": 88060 }, { "epoch": 1.5461998981723695, "grad_norm": 0.05055230813281649, "learning_rate": 0.00016040702489529912, "loss": 0.8472, "step": 88070 }, { "epoch": 1.546375463052371, "grad_norm": 0.05367393950291271, "learning_rate": 0.000160398110205961, "loss": 0.8503, "step": 88080 }, { "epoch": 1.5465510279323724, "grad_norm": 0.07735134052896653, "learning_rate": 0.00016038919476403743, "loss": 0.8496, "step": 88090 }, { "epoch": 1.546726592812374, "grad_norm": 0.051543715312734266, "learning_rate": 0.00016038027856964143, "loss": 0.852, "step": 88100 }, { "epoch": 1.5469021576923754, "grad_norm": 0.06713413382240226, "learning_rate": 0.00016037136162288582, "loss": 0.8573, "step": 88110 }, { "epoch": 1.5470777225723766, "grad_norm": 0.06739465327836253, "learning_rate": 0.00016036244392388376, "loss": 0.844, "step": 88120 }, { "epoch": 1.547253287452378, "grad_norm": 0.0668324989970712, "learning_rate": 0.00016035352547274812, "loss": 0.8467, "step": 88130 }, { "epoch": 1.5474288523323794, "grad_norm": 0.09528290421779649, "learning_rate": 0.00016034460626959194, "loss": 0.8426, "step": 88140 }, { "epoch": 1.5476044172123808, "grad_norm": 0.08148505263461221, "learning_rate": 0.00016033568631452821, "loss": 0.845, "step": 88150 }, { "epoch": 1.5477799820923823, "grad_norm": 0.08306487545775168, "learning_rate": 0.00016032676560767, "loss": 0.8387, "step": 88160 }, { "epoch": 1.5479555469723838, "grad_norm": 0.0631228546814343, "learning_rate": 0.00016031784414913028, "loss": 0.8491, "step": 88170 }, { "epoch": 1.548131111852385, "grad_norm": 0.06170747194712506, "learning_rate": 0.00016030892193902212, "loss": 0.8579, "step": 88180 }, { "epoch": 1.5483066767323863, "grad_norm": 0.053697332745529736, "learning_rate": 0.0001602999989774585, "loss": 0.8477, "step": 88190 }, { "epoch": 1.5484822416123878, "grad_norm": 0.0553295765604524, "learning_rate": 0.0001602910752645526, "loss": 0.8519, "step": 88200 }, { "epoch": 1.5486578064923893, "grad_norm": 0.0647998212387906, "learning_rate": 0.00016028215080041738, "loss": 0.846, "step": 88210 }, { "epoch": 1.5488333713723907, "grad_norm": 0.05476234694836397, "learning_rate": 0.00016027322558516597, "loss": 0.8514, "step": 88220 }, { "epoch": 1.5490089362523922, "grad_norm": 0.06282222476014848, "learning_rate": 0.00016026429961891138, "loss": 0.855, "step": 88230 }, { "epoch": 1.5491845011323935, "grad_norm": 0.06701701338471586, "learning_rate": 0.0001602553729017668, "loss": 0.8494, "step": 88240 }, { "epoch": 1.5493600660123947, "grad_norm": 0.08155078073142208, "learning_rate": 0.00016024644543384528, "loss": 0.8481, "step": 88250 }, { "epoch": 1.5495356308923962, "grad_norm": 0.05729407581065136, "learning_rate": 0.00016023751721525994, "loss": 0.8444, "step": 88260 }, { "epoch": 1.5497111957723977, "grad_norm": 0.05783369762170022, "learning_rate": 0.00016022858824612393, "loss": 0.8434, "step": 88270 }, { "epoch": 1.5498867606523992, "grad_norm": 0.051016863323899306, "learning_rate": 0.00016021965852655028, "loss": 0.8476, "step": 88280 }, { "epoch": 1.5500623255324006, "grad_norm": 0.07662158958791614, "learning_rate": 0.0001602107280566523, "loss": 0.8476, "step": 88290 }, { "epoch": 1.550237890412402, "grad_norm": 0.0691013778827576, "learning_rate": 0.000160201796836543, "loss": 0.8516, "step": 88300 }, { "epoch": 1.5504134552924032, "grad_norm": 0.05963990022038693, "learning_rate": 0.00016019286486633556, "loss": 0.8574, "step": 88310 }, { "epoch": 1.5505890201724046, "grad_norm": 0.05430415160057141, "learning_rate": 0.00016018393214614323, "loss": 0.8429, "step": 88320 }, { "epoch": 1.550764585052406, "grad_norm": 0.05996548764599677, "learning_rate": 0.0001601749986760791, "loss": 0.8515, "step": 88330 }, { "epoch": 1.5509401499324076, "grad_norm": 0.06526205407218191, "learning_rate": 0.00016016606445625644, "loss": 0.8466, "step": 88340 }, { "epoch": 1.551115714812409, "grad_norm": 0.0712808859374712, "learning_rate": 0.0001601571294867884, "loss": 0.8466, "step": 88350 }, { "epoch": 1.5512912796924103, "grad_norm": 0.05384173280871978, "learning_rate": 0.00016014819376778812, "loss": 0.8496, "step": 88360 }, { "epoch": 1.5514668445724116, "grad_norm": 0.08317107856886348, "learning_rate": 0.00016013925729936896, "loss": 0.8514, "step": 88370 }, { "epoch": 1.551642409452413, "grad_norm": 0.05111917805532528, "learning_rate": 0.00016013032008164402, "loss": 0.8432, "step": 88380 }, { "epoch": 1.5518179743324145, "grad_norm": 0.08576891342094982, "learning_rate": 0.00016012138211472668, "loss": 0.8459, "step": 88390 }, { "epoch": 1.551993539212416, "grad_norm": 0.06495403405657989, "learning_rate": 0.00016011244339873002, "loss": 0.8493, "step": 88400 }, { "epoch": 1.5521691040924175, "grad_norm": 0.07032652576154196, "learning_rate": 0.00016010350393376742, "loss": 0.8525, "step": 88410 }, { "epoch": 1.5523446689724187, "grad_norm": 0.04925381706446803, "learning_rate": 0.0001600945637199521, "loss": 0.8388, "step": 88420 }, { "epoch": 1.5525202338524202, "grad_norm": 0.07071921778151775, "learning_rate": 0.00016008562275739727, "loss": 0.8532, "step": 88430 }, { "epoch": 1.5526957987324215, "grad_norm": 0.04921774009760984, "learning_rate": 0.00016007668104621635, "loss": 0.8489, "step": 88440 }, { "epoch": 1.552871363612423, "grad_norm": 0.05282641106717509, "learning_rate": 0.0001600677385865225, "loss": 0.852, "step": 88450 }, { "epoch": 1.5530469284924244, "grad_norm": 0.054797419822645896, "learning_rate": 0.0001600587953784291, "loss": 0.8455, "step": 88460 }, { "epoch": 1.553222493372426, "grad_norm": 0.04378570430651425, "learning_rate": 0.00016004985142204945, "loss": 0.8483, "step": 88470 }, { "epoch": 1.5533980582524272, "grad_norm": 0.05204091314603641, "learning_rate": 0.00016004090671749684, "loss": 0.8548, "step": 88480 }, { "epoch": 1.5535736231324286, "grad_norm": 0.05254989047127055, "learning_rate": 0.00016003196126488468, "loss": 0.8469, "step": 88490 }, { "epoch": 1.55374918801243, "grad_norm": 0.05012565057882266, "learning_rate": 0.00016002301506432622, "loss": 0.8443, "step": 88500 }, { "epoch": 1.5539247528924314, "grad_norm": 0.048538592089351934, "learning_rate": 0.0001600140681159349, "loss": 0.843, "step": 88510 }, { "epoch": 1.5541003177724328, "grad_norm": 0.0457394050187913, "learning_rate": 0.00016000512041982394, "loss": 0.8564, "step": 88520 }, { "epoch": 1.5542758826524343, "grad_norm": 0.06649201881095054, "learning_rate": 0.00015999617197610684, "loss": 0.852, "step": 88530 }, { "epoch": 1.5544514475324356, "grad_norm": 0.04672136897033637, "learning_rate": 0.00015998722278489695, "loss": 0.8473, "step": 88540 }, { "epoch": 1.554627012412437, "grad_norm": 0.06294784327979482, "learning_rate": 0.0001599782728463076, "loss": 0.8506, "step": 88550 }, { "epoch": 1.5548025772924383, "grad_norm": 0.051452873912756825, "learning_rate": 0.00015996932216045227, "loss": 0.8463, "step": 88560 }, { "epoch": 1.5549781421724398, "grad_norm": 0.049189441520006205, "learning_rate": 0.00015996037072744433, "loss": 0.8497, "step": 88570 }, { "epoch": 1.5551537070524413, "grad_norm": 0.0724836231786538, "learning_rate": 0.00015995141854739718, "loss": 0.8474, "step": 88580 }, { "epoch": 1.5553292719324427, "grad_norm": 0.05804144157832132, "learning_rate": 0.00015994246562042424, "loss": 0.8434, "step": 88590 }, { "epoch": 1.555504836812444, "grad_norm": 0.05003232552640343, "learning_rate": 0.00015993351194663896, "loss": 0.8426, "step": 88600 }, { "epoch": 1.5556804016924455, "grad_norm": 0.06031231487671292, "learning_rate": 0.00015992455752615484, "loss": 0.854, "step": 88610 }, { "epoch": 1.5558559665724467, "grad_norm": 0.04576508548163368, "learning_rate": 0.00015991560235908525, "loss": 0.8486, "step": 88620 }, { "epoch": 1.5560315314524482, "grad_norm": 0.06051559688133626, "learning_rate": 0.00015990664644554368, "loss": 0.8506, "step": 88630 }, { "epoch": 1.5562070963324497, "grad_norm": 0.054291799131279586, "learning_rate": 0.00015989768978564363, "loss": 0.8509, "step": 88640 }, { "epoch": 1.5563826612124512, "grad_norm": 0.08643777691916793, "learning_rate": 0.00015988873237949856, "loss": 0.848, "step": 88650 }, { "epoch": 1.5565582260924524, "grad_norm": 0.05036279806804781, "learning_rate": 0.00015987977422722194, "loss": 0.8549, "step": 88660 }, { "epoch": 1.556733790972454, "grad_norm": 0.05762000529333138, "learning_rate": 0.00015987081532892734, "loss": 0.8501, "step": 88670 }, { "epoch": 1.5569093558524552, "grad_norm": 0.06219722378045775, "learning_rate": 0.0001598618556847282, "loss": 0.8512, "step": 88680 }, { "epoch": 1.5570849207324566, "grad_norm": 0.05751065250900086, "learning_rate": 0.00015985289529473807, "loss": 0.8504, "step": 88690 }, { "epoch": 1.5572604856124581, "grad_norm": 0.07027597801459065, "learning_rate": 0.00015984393415907045, "loss": 0.8565, "step": 88700 }, { "epoch": 1.5574360504924596, "grad_norm": 0.052995549901803926, "learning_rate": 0.00015983497227783892, "loss": 0.8506, "step": 88710 }, { "epoch": 1.5576116153724608, "grad_norm": 0.062092029269798184, "learning_rate": 0.00015982600965115705, "loss": 0.8498, "step": 88720 }, { "epoch": 1.5577871802524623, "grad_norm": 0.07814759280050043, "learning_rate": 0.00015981704627913835, "loss": 0.86, "step": 88730 }, { "epoch": 1.5579627451324636, "grad_norm": 0.05685220999653638, "learning_rate": 0.00015980808216189637, "loss": 0.851, "step": 88740 }, { "epoch": 1.558138310012465, "grad_norm": 0.09110692982604841, "learning_rate": 0.00015979911729954477, "loss": 0.8511, "step": 88750 }, { "epoch": 1.5583138748924665, "grad_norm": 0.05996319216297568, "learning_rate": 0.00015979015169219704, "loss": 0.8452, "step": 88760 }, { "epoch": 1.558489439772468, "grad_norm": 0.0604021298173742, "learning_rate": 0.00015978118533996685, "loss": 0.8536, "step": 88770 }, { "epoch": 1.5586650046524693, "grad_norm": 0.06390410238208531, "learning_rate": 0.00015977221824296778, "loss": 0.8506, "step": 88780 }, { "epoch": 1.5588405695324707, "grad_norm": 0.061472288992131446, "learning_rate": 0.0001597632504013134, "loss": 0.8434, "step": 88790 }, { "epoch": 1.559016134412472, "grad_norm": 0.07439558690993833, "learning_rate": 0.0001597542818151174, "loss": 0.8446, "step": 88800 }, { "epoch": 1.5591916992924735, "grad_norm": 0.05360402589715738, "learning_rate": 0.0001597453124844934, "loss": 0.845, "step": 88810 }, { "epoch": 1.559367264172475, "grad_norm": 0.0598744398739961, "learning_rate": 0.000159736342409555, "loss": 0.8514, "step": 88820 }, { "epoch": 1.5595428290524764, "grad_norm": 0.0661249309422794, "learning_rate": 0.00015972737159041593, "loss": 0.8487, "step": 88830 }, { "epoch": 1.559718393932478, "grad_norm": 0.07636917032945023, "learning_rate": 0.00015971840002718978, "loss": 0.8436, "step": 88840 }, { "epoch": 1.5598939588124792, "grad_norm": 0.060277134665484365, "learning_rate": 0.00015970942771999027, "loss": 0.8427, "step": 88850 }, { "epoch": 1.5600695236924804, "grad_norm": 0.046329278382515875, "learning_rate": 0.00015970045466893104, "loss": 0.848, "step": 88860 }, { "epoch": 1.560245088572482, "grad_norm": 0.05969738909978531, "learning_rate": 0.00015969148087412578, "loss": 0.8429, "step": 88870 }, { "epoch": 1.5604206534524834, "grad_norm": 0.04783374187444261, "learning_rate": 0.00015968250633568821, "loss": 0.857, "step": 88880 }, { "epoch": 1.5605962183324849, "grad_norm": 0.05374600030585941, "learning_rate": 0.0001596735310537321, "loss": 0.8426, "step": 88890 }, { "epoch": 1.5607717832124863, "grad_norm": 0.05059625071216359, "learning_rate": 0.00015966455502837106, "loss": 0.8595, "step": 88900 }, { "epoch": 1.5609473480924876, "grad_norm": 0.059065205009966426, "learning_rate": 0.00015965557825971886, "loss": 0.844, "step": 88910 }, { "epoch": 1.5611229129724888, "grad_norm": 0.054870494587684636, "learning_rate": 0.00015964660074788926, "loss": 0.8436, "step": 88920 }, { "epoch": 1.5612984778524903, "grad_norm": 0.04675668622036516, "learning_rate": 0.00015963762249299597, "loss": 0.8498, "step": 88930 }, { "epoch": 1.5614740427324918, "grad_norm": 0.052069446860781625, "learning_rate": 0.0001596286434951528, "loss": 0.8507, "step": 88940 }, { "epoch": 1.5616496076124933, "grad_norm": 0.0752571607433736, "learning_rate": 0.00015961966375447345, "loss": 0.8507, "step": 88950 }, { "epoch": 1.5618251724924948, "grad_norm": 0.06489792951138974, "learning_rate": 0.00015961068327107175, "loss": 0.8514, "step": 88960 }, { "epoch": 1.562000737372496, "grad_norm": 0.09207941834341338, "learning_rate": 0.00015960170204506143, "loss": 0.8466, "step": 88970 }, { "epoch": 1.5621763022524973, "grad_norm": 0.06858110588479889, "learning_rate": 0.0001595927200765563, "loss": 0.8516, "step": 88980 }, { "epoch": 1.5623518671324987, "grad_norm": 0.07442538658884262, "learning_rate": 0.0001595837373656702, "loss": 0.8462, "step": 88990 }, { "epoch": 1.5625274320125002, "grad_norm": 0.047590865544009577, "learning_rate": 0.00015957475391251694, "loss": 0.8532, "step": 89000 }, { "epoch": 1.5627029968925017, "grad_norm": 0.04527021988690423, "learning_rate": 0.00015956576971721032, "loss": 0.849, "step": 89010 }, { "epoch": 1.5628785617725032, "grad_norm": 0.05727803923957486, "learning_rate": 0.00015955678477986418, "loss": 0.848, "step": 89020 }, { "epoch": 1.5630541266525044, "grad_norm": 0.07024344774194537, "learning_rate": 0.00015954779910059232, "loss": 0.847, "step": 89030 }, { "epoch": 1.5632296915325057, "grad_norm": 0.05419523783918599, "learning_rate": 0.00015953881267950863, "loss": 0.8458, "step": 89040 }, { "epoch": 1.5634052564125072, "grad_norm": 0.05893936786791296, "learning_rate": 0.00015952982551672697, "loss": 0.8529, "step": 89050 }, { "epoch": 1.5635808212925086, "grad_norm": 0.061816623961772434, "learning_rate": 0.00015952083761236122, "loss": 0.8474, "step": 89060 }, { "epoch": 1.5637563861725101, "grad_norm": 0.047065375943145873, "learning_rate": 0.00015951184896652524, "loss": 0.8439, "step": 89070 }, { "epoch": 1.5639319510525116, "grad_norm": 0.059529795079337505, "learning_rate": 0.0001595028595793329, "loss": 0.8474, "step": 89080 }, { "epoch": 1.5641075159325128, "grad_norm": 0.05539493935948797, "learning_rate": 0.00015949386945089813, "loss": 0.8422, "step": 89090 }, { "epoch": 1.564283080812514, "grad_norm": 0.048387218981702375, "learning_rate": 0.0001594848785813348, "loss": 0.8483, "step": 89100 }, { "epoch": 1.5644586456925156, "grad_norm": 0.05972449231083537, "learning_rate": 0.00015947588697075687, "loss": 0.8467, "step": 89110 }, { "epoch": 1.564634210572517, "grad_norm": 0.08242853930650401, "learning_rate": 0.00015946689461927827, "loss": 0.8506, "step": 89120 }, { "epoch": 1.5648097754525185, "grad_norm": 0.0691499123426812, "learning_rate": 0.00015945790152701285, "loss": 0.8512, "step": 89130 }, { "epoch": 1.56498534033252, "grad_norm": 0.06379789932670288, "learning_rate": 0.00015944890769407468, "loss": 0.85, "step": 89140 }, { "epoch": 1.5651609052125213, "grad_norm": 0.05564746835500519, "learning_rate": 0.00015943991312057762, "loss": 0.8464, "step": 89150 }, { "epoch": 1.5653364700925227, "grad_norm": 0.09475315615288553, "learning_rate": 0.00015943091780663563, "loss": 0.8497, "step": 89160 }, { "epoch": 1.565512034972524, "grad_norm": 0.08547003845546587, "learning_rate": 0.00015942192175236275, "loss": 0.8434, "step": 89170 }, { "epoch": 1.5656875998525255, "grad_norm": 0.06677338418707977, "learning_rate": 0.00015941292495787293, "loss": 0.849, "step": 89180 }, { "epoch": 1.565863164732527, "grad_norm": 0.06306045779049665, "learning_rate": 0.00015940392742328015, "loss": 0.8485, "step": 89190 }, { "epoch": 1.5660387296125284, "grad_norm": 0.05095060983567082, "learning_rate": 0.0001593949291486984, "loss": 0.8597, "step": 89200 }, { "epoch": 1.5662142944925297, "grad_norm": 0.05247095994731161, "learning_rate": 0.0001593859301342417, "loss": 0.8445, "step": 89210 }, { "epoch": 1.5663898593725312, "grad_norm": 0.05761501744974937, "learning_rate": 0.0001593769303800241, "loss": 0.8476, "step": 89220 }, { "epoch": 1.5665654242525324, "grad_norm": 0.07615885218608445, "learning_rate": 0.0001593679298861596, "loss": 0.8518, "step": 89230 }, { "epoch": 1.566740989132534, "grad_norm": 0.04910385453818074, "learning_rate": 0.00015935892865276223, "loss": 0.8526, "step": 89240 }, { "epoch": 1.5669165540125354, "grad_norm": 0.05883131024198244, "learning_rate": 0.00015934992667994608, "loss": 0.8539, "step": 89250 }, { "epoch": 1.5670921188925369, "grad_norm": 0.05811371220460152, "learning_rate": 0.00015934092396782516, "loss": 0.8498, "step": 89260 }, { "epoch": 1.5672676837725381, "grad_norm": 0.07172066859847419, "learning_rate": 0.00015933192051651354, "loss": 0.8402, "step": 89270 }, { "epoch": 1.5674432486525396, "grad_norm": 0.04694687412383926, "learning_rate": 0.0001593229163261253, "loss": 0.8441, "step": 89280 }, { "epoch": 1.5676188135325408, "grad_norm": 0.055858899026244004, "learning_rate": 0.00015931391139677455, "loss": 0.8447, "step": 89290 }, { "epoch": 1.5677943784125423, "grad_norm": 0.04296509976650514, "learning_rate": 0.00015930490572857534, "loss": 0.8465, "step": 89300 }, { "epoch": 1.5679699432925438, "grad_norm": 0.06049585235252407, "learning_rate": 0.00015929589932164184, "loss": 0.8488, "step": 89310 }, { "epoch": 1.5681455081725453, "grad_norm": 0.06827125783006885, "learning_rate": 0.00015928689217608813, "loss": 0.8499, "step": 89320 }, { "epoch": 1.5683210730525465, "grad_norm": 0.060336242316247964, "learning_rate": 0.0001592778842920283, "loss": 0.844, "step": 89330 }, { "epoch": 1.568496637932548, "grad_norm": 0.06377148726042152, "learning_rate": 0.0001592688756695765, "loss": 0.8518, "step": 89340 }, { "epoch": 1.5686722028125493, "grad_norm": 0.07547871726697879, "learning_rate": 0.00015925986630884687, "loss": 0.8492, "step": 89350 }, { "epoch": 1.5688477676925507, "grad_norm": 0.06967033526518446, "learning_rate": 0.00015925085620995363, "loss": 0.8531, "step": 89360 }, { "epoch": 1.5690233325725522, "grad_norm": 0.09818772962951894, "learning_rate": 0.0001592418453730108, "loss": 0.8557, "step": 89370 }, { "epoch": 1.5691988974525537, "grad_norm": 0.05753860856423985, "learning_rate": 0.00015923283379813265, "loss": 0.8457, "step": 89380 }, { "epoch": 1.569374462332555, "grad_norm": 0.07231264138816644, "learning_rate": 0.00015922382148543336, "loss": 0.8465, "step": 89390 }, { "epoch": 1.5695500272125564, "grad_norm": 0.05732115554831139, "learning_rate": 0.00015921480843502706, "loss": 0.8516, "step": 89400 }, { "epoch": 1.5697255920925577, "grad_norm": 0.08177058162724586, "learning_rate": 0.000159205794647028, "loss": 0.8497, "step": 89410 }, { "epoch": 1.5699011569725592, "grad_norm": 0.058071733135123246, "learning_rate": 0.0001591967801215504, "loss": 0.8424, "step": 89420 }, { "epoch": 1.5700767218525606, "grad_norm": 0.08192167399127567, "learning_rate": 0.0001591877648587084, "loss": 0.8479, "step": 89430 }, { "epoch": 1.5702522867325621, "grad_norm": 0.08388286292140544, "learning_rate": 0.00015917874885861626, "loss": 0.8501, "step": 89440 }, { "epoch": 1.5704278516125634, "grad_norm": 0.07889809089431103, "learning_rate": 0.00015916973212138825, "loss": 0.8483, "step": 89450 }, { "epoch": 1.5706034164925649, "grad_norm": 0.050777020569458375, "learning_rate": 0.00015916071464713857, "loss": 0.8538, "step": 89460 }, { "epoch": 1.570778981372566, "grad_norm": 0.06529639602846891, "learning_rate": 0.00015915169643598148, "loss": 0.8427, "step": 89470 }, { "epoch": 1.5709545462525676, "grad_norm": 0.08184648453335215, "learning_rate": 0.00015914267748803125, "loss": 0.8537, "step": 89480 }, { "epoch": 1.571130111132569, "grad_norm": 0.05959528169391161, "learning_rate": 0.0001591336578034022, "loss": 0.8445, "step": 89490 }, { "epoch": 1.5713056760125705, "grad_norm": 0.04853878681723957, "learning_rate": 0.0001591246373822085, "loss": 0.8429, "step": 89500 }, { "epoch": 1.5714812408925718, "grad_norm": 0.06152299262628816, "learning_rate": 0.00015911561622456458, "loss": 0.8494, "step": 89510 }, { "epoch": 1.5716568057725733, "grad_norm": 0.05247759037694654, "learning_rate": 0.00015910659433058462, "loss": 0.8495, "step": 89520 }, { "epoch": 1.5718323706525745, "grad_norm": 0.05628279628958935, "learning_rate": 0.00015909757170038297, "loss": 0.8478, "step": 89530 }, { "epoch": 1.572007935532576, "grad_norm": 0.04531924732510581, "learning_rate": 0.00015908854833407394, "loss": 0.8597, "step": 89540 }, { "epoch": 1.5721835004125775, "grad_norm": 0.08984047390467315, "learning_rate": 0.00015907952423177188, "loss": 0.849, "step": 89550 }, { "epoch": 1.572359065292579, "grad_norm": 0.06390749096286903, "learning_rate": 0.00015907049939359116, "loss": 0.8351, "step": 89560 }, { "epoch": 1.5725346301725804, "grad_norm": 0.11485183125526226, "learning_rate": 0.00015906147381964604, "loss": 0.8496, "step": 89570 }, { "epoch": 1.5727101950525817, "grad_norm": 0.04914515352020735, "learning_rate": 0.00015905244751005093, "loss": 0.8442, "step": 89580 }, { "epoch": 1.572885759932583, "grad_norm": 0.053491241413736325, "learning_rate": 0.00015904342046492017, "loss": 0.8492, "step": 89590 }, { "epoch": 1.5730613248125844, "grad_norm": 0.08577397729741201, "learning_rate": 0.00015903439268436816, "loss": 0.8486, "step": 89600 }, { "epoch": 1.573236889692586, "grad_norm": 0.06977827278470844, "learning_rate": 0.0001590253641685093, "loss": 0.8612, "step": 89610 }, { "epoch": 1.5734124545725874, "grad_norm": 0.047024881107853266, "learning_rate": 0.0001590163349174579, "loss": 0.8557, "step": 89620 }, { "epoch": 1.5735880194525889, "grad_norm": 0.05163372702844441, "learning_rate": 0.00015900730493132847, "loss": 0.8457, "step": 89630 }, { "epoch": 1.5737635843325901, "grad_norm": 0.05126481068248318, "learning_rate": 0.00015899827421023534, "loss": 0.8478, "step": 89640 }, { "epoch": 1.5739391492125914, "grad_norm": 0.11860950350092861, "learning_rate": 0.00015898924275429296, "loss": 0.8385, "step": 89650 }, { "epoch": 1.5741147140925928, "grad_norm": 0.06080400724719035, "learning_rate": 0.00015898021056361578, "loss": 0.8465, "step": 89660 }, { "epoch": 1.5742902789725943, "grad_norm": 0.05252602091441076, "learning_rate": 0.0001589711776383182, "loss": 0.8539, "step": 89670 }, { "epoch": 1.5744658438525958, "grad_norm": 0.055558391321736136, "learning_rate": 0.0001589621439785147, "loss": 0.8344, "step": 89680 }, { "epoch": 1.5746414087325973, "grad_norm": 0.048554017455228554, "learning_rate": 0.0001589531095843197, "loss": 0.8643, "step": 89690 }, { "epoch": 1.5748169736125985, "grad_norm": 0.07786323951809927, "learning_rate": 0.00015894407445584777, "loss": 0.8608, "step": 89700 }, { "epoch": 1.5749925384925998, "grad_norm": 0.052666706499331396, "learning_rate": 0.0001589350385932132, "loss": 0.8483, "step": 89710 }, { "epoch": 1.5751681033726013, "grad_norm": 0.060375617508938, "learning_rate": 0.00015892600199653067, "loss": 0.8514, "step": 89720 }, { "epoch": 1.5753436682526027, "grad_norm": 0.07132466310747458, "learning_rate": 0.00015891696466591458, "loss": 0.8456, "step": 89730 }, { "epoch": 1.5755192331326042, "grad_norm": 0.07568586055292299, "learning_rate": 0.00015890792660147945, "loss": 0.8564, "step": 89740 }, { "epoch": 1.5756947980126057, "grad_norm": 0.05961020734504238, "learning_rate": 0.00015889888780333978, "loss": 0.844, "step": 89750 }, { "epoch": 1.575870362892607, "grad_norm": 0.049964481026431744, "learning_rate": 0.0001588898482716101, "loss": 0.8391, "step": 89760 }, { "epoch": 1.5760459277726082, "grad_norm": 0.05407445579332524, "learning_rate": 0.00015888080800640497, "loss": 0.8507, "step": 89770 }, { "epoch": 1.5762214926526097, "grad_norm": 0.06506645932523047, "learning_rate": 0.00015887176700783888, "loss": 0.8466, "step": 89780 }, { "epoch": 1.5763970575326112, "grad_norm": 0.05758450640730029, "learning_rate": 0.00015886272527602642, "loss": 0.8512, "step": 89790 }, { "epoch": 1.5765726224126126, "grad_norm": 0.052506611765551124, "learning_rate": 0.00015885368281108215, "loss": 0.8523, "step": 89800 }, { "epoch": 1.5767481872926141, "grad_norm": 0.061253303751228574, "learning_rate": 0.00015884463961312063, "loss": 0.8407, "step": 89810 }, { "epoch": 1.5769237521726154, "grad_norm": 0.052101287067195114, "learning_rate": 0.00015883559568225644, "loss": 0.8495, "step": 89820 }, { "epoch": 1.5770993170526166, "grad_norm": 0.04721081385674925, "learning_rate": 0.00015882655101860416, "loss": 0.8491, "step": 89830 }, { "epoch": 1.5772748819326181, "grad_norm": 0.04598883507952143, "learning_rate": 0.0001588175056222784, "loss": 0.845, "step": 89840 }, { "epoch": 1.5774504468126196, "grad_norm": 0.0523946461932713, "learning_rate": 0.00015880845949339377, "loss": 0.8433, "step": 89850 }, { "epoch": 1.577626011692621, "grad_norm": 0.07913755532499649, "learning_rate": 0.00015879941263206488, "loss": 0.8518, "step": 89860 }, { "epoch": 1.5778015765726225, "grad_norm": 0.057784471720608965, "learning_rate": 0.00015879036503840634, "loss": 0.8454, "step": 89870 }, { "epoch": 1.5779771414526238, "grad_norm": 0.0498389468070698, "learning_rate": 0.00015878131671253284, "loss": 0.8469, "step": 89880 }, { "epoch": 1.5781527063326253, "grad_norm": 0.09276174894668411, "learning_rate": 0.00015877226765455892, "loss": 0.8475, "step": 89890 }, { "epoch": 1.5783282712126265, "grad_norm": 0.0777544995685139, "learning_rate": 0.00015876321786459932, "loss": 0.8507, "step": 89900 }, { "epoch": 1.578503836092628, "grad_norm": 0.0710354551816941, "learning_rate": 0.00015875416734276873, "loss": 0.8457, "step": 89910 }, { "epoch": 1.5786794009726295, "grad_norm": 0.06011961498007995, "learning_rate": 0.00015874511608918174, "loss": 0.8512, "step": 89920 }, { "epoch": 1.578854965852631, "grad_norm": 0.0519436394024867, "learning_rate": 0.00015873606410395303, "loss": 0.8412, "step": 89930 }, { "epoch": 1.5790305307326322, "grad_norm": 0.07501284688683946, "learning_rate": 0.00015872701138719736, "loss": 0.8539, "step": 89940 }, { "epoch": 1.5792060956126337, "grad_norm": 0.08460171056173171, "learning_rate": 0.00015871795793902935, "loss": 0.8504, "step": 89950 }, { "epoch": 1.579381660492635, "grad_norm": 0.06234735586767833, "learning_rate": 0.0001587089037595638, "loss": 0.8506, "step": 89960 }, { "epoch": 1.5795572253726364, "grad_norm": 0.08231478675029717, "learning_rate": 0.00015869984884891536, "loss": 0.8435, "step": 89970 }, { "epoch": 1.579732790252638, "grad_norm": 0.04497837180579472, "learning_rate": 0.00015869079320719875, "loss": 0.8516, "step": 89980 }, { "epoch": 1.5799083551326394, "grad_norm": 0.05788115214336489, "learning_rate": 0.00015868173683452878, "loss": 0.8416, "step": 89990 }, { "epoch": 1.5800839200126406, "grad_norm": 0.053367191804467425, "learning_rate": 0.00015867267973102014, "loss": 0.8475, "step": 90000 }, { "epoch": 1.5802594848926421, "grad_norm": 0.0817991724129242, "learning_rate": 0.00015866362189678756, "loss": 0.8437, "step": 90010 }, { "epoch": 1.5804350497726434, "grad_norm": 0.0592551582884363, "learning_rate": 0.0001586545633319459, "loss": 0.8413, "step": 90020 }, { "epoch": 1.5806106146526449, "grad_norm": 0.0734989714498931, "learning_rate": 0.0001586455040366098, "loss": 0.858, "step": 90030 }, { "epoch": 1.5807861795326463, "grad_norm": 0.04987343159925907, "learning_rate": 0.00015863644401089414, "loss": 0.8474, "step": 90040 }, { "epoch": 1.5809617444126478, "grad_norm": 0.07396840234814855, "learning_rate": 0.00015862738325491372, "loss": 0.8472, "step": 90050 }, { "epoch": 1.581137309292649, "grad_norm": 0.044668026838924145, "learning_rate": 0.0001586183217687833, "loss": 0.8492, "step": 90060 }, { "epoch": 1.5813128741726505, "grad_norm": 0.06952996641361733, "learning_rate": 0.0001586092595526177, "loss": 0.8398, "step": 90070 }, { "epoch": 1.5814884390526518, "grad_norm": 0.08469776901996329, "learning_rate": 0.0001586001966065317, "loss": 0.8413, "step": 90080 }, { "epoch": 1.5816640039326533, "grad_norm": 0.05476052027144533, "learning_rate": 0.00015859113293064024, "loss": 0.8417, "step": 90090 }, { "epoch": 1.5818395688126548, "grad_norm": 0.06509463728255599, "learning_rate": 0.00015858206852505802, "loss": 0.8495, "step": 90100 }, { "epoch": 1.5820151336926562, "grad_norm": 0.08604584871852632, "learning_rate": 0.00015857300338990003, "loss": 0.847, "step": 90110 }, { "epoch": 1.5821906985726575, "grad_norm": 0.06739688704903073, "learning_rate": 0.000158563937525281, "loss": 0.8461, "step": 90120 }, { "epoch": 1.582366263452659, "grad_norm": 0.09777909220835573, "learning_rate": 0.00015855487093131588, "loss": 0.8481, "step": 90130 }, { "epoch": 1.5825418283326602, "grad_norm": 0.05025014392632189, "learning_rate": 0.00015854580360811948, "loss": 0.8436, "step": 90140 }, { "epoch": 1.5827173932126617, "grad_norm": 0.0729243499720481, "learning_rate": 0.0001585367355558067, "loss": 0.8455, "step": 90150 }, { "epoch": 1.5828929580926632, "grad_norm": 0.0482957020443582, "learning_rate": 0.00015852766677449252, "loss": 0.8526, "step": 90160 }, { "epoch": 1.5830685229726646, "grad_norm": 0.054153812546168946, "learning_rate": 0.00015851859726429172, "loss": 0.8479, "step": 90170 }, { "epoch": 1.583244087852666, "grad_norm": 0.08461447668551615, "learning_rate": 0.00015850952702531928, "loss": 0.8451, "step": 90180 }, { "epoch": 1.5834196527326674, "grad_norm": 0.07154162677638357, "learning_rate": 0.00015850045605769015, "loss": 0.8487, "step": 90190 }, { "epoch": 1.5835952176126686, "grad_norm": 0.07594048337357331, "learning_rate": 0.0001584913843615192, "loss": 0.849, "step": 90200 }, { "epoch": 1.5837707824926701, "grad_norm": 0.06999016281372322, "learning_rate": 0.0001584823119369214, "loss": 0.8568, "step": 90210 }, { "epoch": 1.5839463473726716, "grad_norm": 0.050394061084736666, "learning_rate": 0.00015847323878401167, "loss": 0.849, "step": 90220 }, { "epoch": 1.584121912252673, "grad_norm": 0.060138169105124595, "learning_rate": 0.000158464164902905, "loss": 0.8462, "step": 90230 }, { "epoch": 1.5842974771326743, "grad_norm": 0.07027868292364067, "learning_rate": 0.00015845509029371638, "loss": 0.8513, "step": 90240 }, { "epoch": 1.5844730420126758, "grad_norm": 0.04541132588069494, "learning_rate": 0.0001584460149565607, "loss": 0.8526, "step": 90250 }, { "epoch": 1.584648606892677, "grad_norm": 0.06084610774428921, "learning_rate": 0.00015843693889155302, "loss": 0.8468, "step": 90260 }, { "epoch": 1.5848241717726785, "grad_norm": 0.06127498141467736, "learning_rate": 0.00015842786209880832, "loss": 0.8508, "step": 90270 }, { "epoch": 1.58499973665268, "grad_norm": 0.058022499459925124, "learning_rate": 0.00015841878457844162, "loss": 0.8481, "step": 90280 }, { "epoch": 1.5851753015326815, "grad_norm": 0.041820053314453244, "learning_rate": 0.00015840970633056788, "loss": 0.848, "step": 90290 }, { "epoch": 1.585350866412683, "grad_norm": 0.05442291501709164, "learning_rate": 0.00015840062735530218, "loss": 0.8456, "step": 90300 }, { "epoch": 1.5855264312926842, "grad_norm": 0.05165539523610851, "learning_rate": 0.0001583915476527595, "loss": 0.8428, "step": 90310 }, { "epoch": 1.5857019961726855, "grad_norm": 0.0636537012941317, "learning_rate": 0.00015838246722305497, "loss": 0.8536, "step": 90320 }, { "epoch": 1.585877561052687, "grad_norm": 0.05476275584323656, "learning_rate": 0.00015837338606630353, "loss": 0.8473, "step": 90330 }, { "epoch": 1.5860531259326884, "grad_norm": 0.06901978495077143, "learning_rate": 0.0001583643041826203, "loss": 0.858, "step": 90340 }, { "epoch": 1.58622869081269, "grad_norm": 0.05222246542501554, "learning_rate": 0.00015835522157212035, "loss": 0.8578, "step": 90350 }, { "epoch": 1.5864042556926914, "grad_norm": 0.06391788906812292, "learning_rate": 0.00015834613823491878, "loss": 0.8503, "step": 90360 }, { "epoch": 1.5865798205726926, "grad_norm": 0.06548491471328449, "learning_rate": 0.00015833705417113062, "loss": 0.8554, "step": 90370 }, { "epoch": 1.586755385452694, "grad_norm": 0.051357414083936376, "learning_rate": 0.00015832796938087103, "loss": 0.8527, "step": 90380 }, { "epoch": 1.5869309503326954, "grad_norm": 0.05231001878214001, "learning_rate": 0.00015831888386425504, "loss": 0.8503, "step": 90390 }, { "epoch": 1.5871065152126969, "grad_norm": 0.053460475763059005, "learning_rate": 0.00015830979762139778, "loss": 0.8486, "step": 90400 }, { "epoch": 1.5872820800926983, "grad_norm": 0.044879595600270016, "learning_rate": 0.00015830071065241446, "loss": 0.8373, "step": 90410 }, { "epoch": 1.5874576449726998, "grad_norm": 0.07645300618312376, "learning_rate": 0.00015829162295742015, "loss": 0.8407, "step": 90420 }, { "epoch": 1.587633209852701, "grad_norm": 0.04259223793089963, "learning_rate": 0.00015828253453653, "loss": 0.8498, "step": 90430 }, { "epoch": 1.5878087747327023, "grad_norm": 0.0609911649909262, "learning_rate": 0.00015827344538985912, "loss": 0.8511, "step": 90440 }, { "epoch": 1.5879843396127038, "grad_norm": 0.07819172725809735, "learning_rate": 0.00015826435551752273, "loss": 0.8415, "step": 90450 }, { "epoch": 1.5881599044927053, "grad_norm": 0.058382885605504156, "learning_rate": 0.00015825526491963597, "loss": 0.8425, "step": 90460 }, { "epoch": 1.5883354693727068, "grad_norm": 0.052316011791255106, "learning_rate": 0.00015824617359631402, "loss": 0.8493, "step": 90470 }, { "epoch": 1.5885110342527082, "grad_norm": 0.07705183126547047, "learning_rate": 0.0001582370815476721, "loss": 0.851, "step": 90480 }, { "epoch": 1.5886865991327095, "grad_norm": 0.06972954839739358, "learning_rate": 0.0001582279887738254, "loss": 0.8548, "step": 90490 }, { "epoch": 1.5888621640127107, "grad_norm": 0.08106907011150026, "learning_rate": 0.0001582188952748891, "loss": 0.8479, "step": 90500 }, { "epoch": 1.5890377288927122, "grad_norm": 0.05155107819420603, "learning_rate": 0.00015820980105097845, "loss": 0.8603, "step": 90510 }, { "epoch": 1.5892132937727137, "grad_norm": 0.051467485906838036, "learning_rate": 0.0001582007061022086, "loss": 0.85, "step": 90520 }, { "epoch": 1.5893888586527152, "grad_norm": 0.05002601541806635, "learning_rate": 0.00015819161042869491, "loss": 0.8492, "step": 90530 }, { "epoch": 1.5895644235327167, "grad_norm": 0.07646383146857179, "learning_rate": 0.00015818251403055254, "loss": 0.8531, "step": 90540 }, { "epoch": 1.589739988412718, "grad_norm": 0.07916278862989352, "learning_rate": 0.00015817341690789672, "loss": 0.8509, "step": 90550 }, { "epoch": 1.5899155532927192, "grad_norm": 0.0685491624118804, "learning_rate": 0.00015816431906084277, "loss": 0.8508, "step": 90560 }, { "epoch": 1.5900911181727206, "grad_norm": 0.05355526453782794, "learning_rate": 0.00015815522048950596, "loss": 0.8481, "step": 90570 }, { "epoch": 1.5902666830527221, "grad_norm": 0.0517058665443025, "learning_rate": 0.00015814612119400154, "loss": 0.8491, "step": 90580 }, { "epoch": 1.5904422479327236, "grad_norm": 0.05248230055126385, "learning_rate": 0.00015813702117444483, "loss": 0.8346, "step": 90590 }, { "epoch": 1.590617812812725, "grad_norm": 0.05603836946452892, "learning_rate": 0.00015812792043095111, "loss": 0.8497, "step": 90600 }, { "epoch": 1.5907933776927263, "grad_norm": 0.07966594058521918, "learning_rate": 0.00015811881896363563, "loss": 0.8551, "step": 90610 }, { "epoch": 1.5909689425727278, "grad_norm": 0.05456411351032947, "learning_rate": 0.00015810971677261384, "loss": 0.856, "step": 90620 }, { "epoch": 1.591144507452729, "grad_norm": 0.06167278567269861, "learning_rate": 0.0001581006138580009, "loss": 0.8545, "step": 90630 }, { "epoch": 1.5913200723327305, "grad_norm": 0.051146027285322485, "learning_rate": 0.00015809151021991234, "loss": 0.8506, "step": 90640 }, { "epoch": 1.591495637212732, "grad_norm": 0.061096621395542554, "learning_rate": 0.00015808240585846338, "loss": 0.8477, "step": 90650 }, { "epoch": 1.5916712020927335, "grad_norm": 0.05167251227375376, "learning_rate": 0.00015807330077376936, "loss": 0.8392, "step": 90660 }, { "epoch": 1.5918467669727348, "grad_norm": 0.06989966893018146, "learning_rate": 0.00015806419496594573, "loss": 0.8486, "step": 90670 }, { "epoch": 1.5920223318527362, "grad_norm": 0.05048650369548772, "learning_rate": 0.00015805508843510778, "loss": 0.8423, "step": 90680 }, { "epoch": 1.5921978967327375, "grad_norm": 0.05847404539567389, "learning_rate": 0.00015804598118137093, "loss": 0.8514, "step": 90690 }, { "epoch": 1.592373461612739, "grad_norm": 0.05732545183119213, "learning_rate": 0.0001580368732048506, "loss": 0.8448, "step": 90700 }, { "epoch": 1.5925490264927404, "grad_norm": 0.08368328745926819, "learning_rate": 0.00015802776450566207, "loss": 0.8486, "step": 90710 }, { "epoch": 1.592724591372742, "grad_norm": 0.05724524333472676, "learning_rate": 0.00015801865508392092, "loss": 0.8438, "step": 90720 }, { "epoch": 1.5929001562527432, "grad_norm": 0.051032603445990345, "learning_rate": 0.00015800954493974247, "loss": 0.8526, "step": 90730 }, { "epoch": 1.5930757211327446, "grad_norm": 0.05779852063069735, "learning_rate": 0.00015800043407324213, "loss": 0.849, "step": 90740 }, { "epoch": 1.593251286012746, "grad_norm": 0.06966093090000702, "learning_rate": 0.00015799132248453537, "loss": 0.846, "step": 90750 }, { "epoch": 1.5934268508927474, "grad_norm": 0.0637014725410287, "learning_rate": 0.00015798221017373763, "loss": 0.8439, "step": 90760 }, { "epoch": 1.5936024157727489, "grad_norm": 0.05897383169779125, "learning_rate": 0.0001579730971409644, "loss": 0.8459, "step": 90770 }, { "epoch": 1.5937779806527503, "grad_norm": 0.06346822325337578, "learning_rate": 0.00015796398338633108, "loss": 0.8579, "step": 90780 }, { "epoch": 1.5939535455327516, "grad_norm": 0.06190303178759484, "learning_rate": 0.00015795486890995318, "loss": 0.851, "step": 90790 }, { "epoch": 1.594129110412753, "grad_norm": 0.05339750947779269, "learning_rate": 0.00015794575371194617, "loss": 0.8509, "step": 90800 }, { "epoch": 1.5943046752927543, "grad_norm": 0.0622686181858218, "learning_rate": 0.00015793663779242552, "loss": 0.8503, "step": 90810 }, { "epoch": 1.5944802401727558, "grad_norm": 0.05135955920579832, "learning_rate": 0.00015792752115150683, "loss": 0.8417, "step": 90820 }, { "epoch": 1.5946558050527573, "grad_norm": 0.060281370214628725, "learning_rate": 0.0001579184037893055, "loss": 0.8471, "step": 90830 }, { "epoch": 1.5948313699327588, "grad_norm": 0.06177625638115615, "learning_rate": 0.0001579092857059371, "loss": 0.8488, "step": 90840 }, { "epoch": 1.59500693481276, "grad_norm": 0.06402918011686869, "learning_rate": 0.0001579001669015171, "loss": 0.8467, "step": 90850 }, { "epoch": 1.5951824996927615, "grad_norm": 0.07926249988595808, "learning_rate": 0.00015789104737616108, "loss": 0.8411, "step": 90860 }, { "epoch": 1.5953580645727627, "grad_norm": 0.05404522707959022, "learning_rate": 0.00015788192712998467, "loss": 0.8512, "step": 90870 }, { "epoch": 1.5955336294527642, "grad_norm": 0.06804284931719541, "learning_rate": 0.00015787280616310327, "loss": 0.8461, "step": 90880 }, { "epoch": 1.5957091943327657, "grad_norm": 0.04455488311985065, "learning_rate": 0.0001578636844756325, "loss": 0.8558, "step": 90890 }, { "epoch": 1.5958847592127672, "grad_norm": 0.05747085437095737, "learning_rate": 0.00015785456206768802, "loss": 0.856, "step": 90900 }, { "epoch": 1.5960603240927684, "grad_norm": 0.07255860330326162, "learning_rate": 0.00015784543893938526, "loss": 0.8441, "step": 90910 }, { "epoch": 1.59623588897277, "grad_norm": 0.0582782475689912, "learning_rate": 0.00015783631509083996, "loss": 0.8515, "step": 90920 }, { "epoch": 1.5964114538527712, "grad_norm": 0.07167422844264902, "learning_rate": 0.00015782719052216763, "loss": 0.8474, "step": 90930 }, { "epoch": 1.5965870187327726, "grad_norm": 0.07363454923855306, "learning_rate": 0.00015781806523348392, "loss": 0.8499, "step": 90940 }, { "epoch": 1.5967625836127741, "grad_norm": 0.0694143857759646, "learning_rate": 0.0001578089392249044, "loss": 0.8544, "step": 90950 }, { "epoch": 1.5969381484927756, "grad_norm": 0.04751511555202065, "learning_rate": 0.0001577998124965448, "loss": 0.8439, "step": 90960 }, { "epoch": 1.5971137133727769, "grad_norm": 0.06452048517569504, "learning_rate": 0.00015779068504852061, "loss": 0.843, "step": 90970 }, { "epoch": 1.5972892782527783, "grad_norm": 0.0677086073498115, "learning_rate": 0.0001577815568809476, "loss": 0.8557, "step": 90980 }, { "epoch": 1.5974648431327796, "grad_norm": 0.0947987057938556, "learning_rate": 0.00015777242799394137, "loss": 0.8424, "step": 90990 }, { "epoch": 1.597640408012781, "grad_norm": 0.060467064045625164, "learning_rate": 0.00015776329838761762, "loss": 0.8485, "step": 91000 }, { "epoch": 1.5978159728927825, "grad_norm": 0.05240289682728228, "learning_rate": 0.000157754168062092, "loss": 0.8554, "step": 91010 }, { "epoch": 1.597991537772784, "grad_norm": 0.06183856245702646, "learning_rate": 0.00015774503701748013, "loss": 0.8509, "step": 91020 }, { "epoch": 1.5981671026527855, "grad_norm": 0.057817412017159266, "learning_rate": 0.0001577359052538978, "loss": 0.8439, "step": 91030 }, { "epoch": 1.5983426675327868, "grad_norm": 0.07869308131096858, "learning_rate": 0.00015772677277146074, "loss": 0.8538, "step": 91040 }, { "epoch": 1.598518232412788, "grad_norm": 0.06734035881065863, "learning_rate": 0.0001577176395702845, "loss": 0.8497, "step": 91050 }, { "epoch": 1.5986937972927895, "grad_norm": 0.046347515199865214, "learning_rate": 0.00015770850565048497, "loss": 0.8543, "step": 91060 }, { "epoch": 1.598869362172791, "grad_norm": 0.06802572241749714, "learning_rate": 0.00015769937101217776, "loss": 0.8474, "step": 91070 }, { "epoch": 1.5990449270527924, "grad_norm": 0.05088480739225804, "learning_rate": 0.00015769023565547868, "loss": 0.8522, "step": 91080 }, { "epoch": 1.599220491932794, "grad_norm": 0.05682588099678468, "learning_rate": 0.00015768109958050344, "loss": 0.8509, "step": 91090 }, { "epoch": 1.5993960568127952, "grad_norm": 0.07077703487044108, "learning_rate": 0.0001576719627873678, "loss": 0.849, "step": 91100 }, { "epoch": 1.5995716216927964, "grad_norm": 0.04634585797972743, "learning_rate": 0.0001576628252761875, "loss": 0.848, "step": 91110 }, { "epoch": 1.599747186572798, "grad_norm": 0.06994160622375067, "learning_rate": 0.0001576536870470784, "loss": 0.8537, "step": 91120 }, { "epoch": 1.5999227514527994, "grad_norm": 0.05000514765939688, "learning_rate": 0.00015764454810015624, "loss": 0.843, "step": 91130 }, { "epoch": 1.6000983163328009, "grad_norm": 0.06971450206658995, "learning_rate": 0.00015763540843553674, "loss": 0.8522, "step": 91140 }, { "epoch": 1.6002738812128023, "grad_norm": 0.055795072894269146, "learning_rate": 0.0001576262680533358, "loss": 0.8531, "step": 91150 }, { "epoch": 1.6004494460928036, "grad_norm": 0.053088558520548085, "learning_rate": 0.00015761712695366918, "loss": 0.8439, "step": 91160 }, { "epoch": 1.6006250109728049, "grad_norm": 0.05264633628137883, "learning_rate": 0.0001576079851366527, "loss": 0.85, "step": 91170 }, { "epoch": 1.6008005758528063, "grad_norm": 0.04918563515554702, "learning_rate": 0.00015759884260240222, "loss": 0.8369, "step": 91180 }, { "epoch": 1.6009761407328078, "grad_norm": 0.09549794202954581, "learning_rate": 0.00015758969935103357, "loss": 0.8474, "step": 91190 }, { "epoch": 1.6011517056128093, "grad_norm": 0.08867892240997448, "learning_rate": 0.0001575805553826625, "loss": 0.8573, "step": 91200 }, { "epoch": 1.6013272704928108, "grad_norm": 0.06349763283582022, "learning_rate": 0.00015757141069740505, "loss": 0.853, "step": 91210 }, { "epoch": 1.601502835372812, "grad_norm": 0.06303406060857004, "learning_rate": 0.00015756226529537696, "loss": 0.8535, "step": 91220 }, { "epoch": 1.6016784002528133, "grad_norm": 0.04368385586802328, "learning_rate": 0.00015755311917669412, "loss": 0.8448, "step": 91230 }, { "epoch": 1.6018539651328147, "grad_norm": 0.06494193775489887, "learning_rate": 0.0001575439723414724, "loss": 0.85, "step": 91240 }, { "epoch": 1.6020295300128162, "grad_norm": 0.07372031348155193, "learning_rate": 0.00015753482478982772, "loss": 0.8377, "step": 91250 }, { "epoch": 1.6022050948928177, "grad_norm": 0.04910391632707975, "learning_rate": 0.000157525676521876, "loss": 0.8403, "step": 91260 }, { "epoch": 1.6023806597728192, "grad_norm": 0.07275912593923371, "learning_rate": 0.0001575165275377331, "loss": 0.844, "step": 91270 }, { "epoch": 1.6025562246528204, "grad_norm": 0.07815117869557738, "learning_rate": 0.00015750737783751497, "loss": 0.844, "step": 91280 }, { "epoch": 1.6027317895328217, "grad_norm": 0.045732170865893214, "learning_rate": 0.00015749822742133754, "loss": 0.8458, "step": 91290 }, { "epoch": 1.6029073544128232, "grad_norm": 0.05627871712950593, "learning_rate": 0.00015748907628931674, "loss": 0.8455, "step": 91300 }, { "epoch": 1.6030829192928246, "grad_norm": 0.09796676722758486, "learning_rate": 0.00015747992444156852, "loss": 0.848, "step": 91310 }, { "epoch": 1.6032584841728261, "grad_norm": 0.05365628939681921, "learning_rate": 0.00015747077187820883, "loss": 0.8481, "step": 91320 }, { "epoch": 1.6034340490528276, "grad_norm": 0.058556066450472864, "learning_rate": 0.00015746161859935361, "loss": 0.8493, "step": 91330 }, { "epoch": 1.6036096139328289, "grad_norm": 0.06287460081476147, "learning_rate": 0.0001574524646051189, "loss": 0.8474, "step": 91340 }, { "epoch": 1.6037851788128303, "grad_norm": 0.05012901716026252, "learning_rate": 0.00015744330989562064, "loss": 0.8462, "step": 91350 }, { "epoch": 1.6039607436928316, "grad_norm": 0.05166106862436587, "learning_rate": 0.00015743415447097483, "loss": 0.8523, "step": 91360 }, { "epoch": 1.604136308572833, "grad_norm": 0.060191452760553654, "learning_rate": 0.00015742499833129743, "loss": 0.8552, "step": 91370 }, { "epoch": 1.6043118734528345, "grad_norm": 0.06475776157527671, "learning_rate": 0.00015741584147670453, "loss": 0.8433, "step": 91380 }, { "epoch": 1.604487438332836, "grad_norm": 0.06525077659441123, "learning_rate": 0.0001574066839073121, "loss": 0.846, "step": 91390 }, { "epoch": 1.6046630032128373, "grad_norm": 0.05821473694129956, "learning_rate": 0.00015739752562323616, "loss": 0.8609, "step": 91400 }, { "epoch": 1.6048385680928388, "grad_norm": 0.05945963092466336, "learning_rate": 0.00015738836662459278, "loss": 0.8446, "step": 91410 }, { "epoch": 1.60501413297284, "grad_norm": 0.05246507726981029, "learning_rate": 0.00015737920691149797, "loss": 0.8499, "step": 91420 }, { "epoch": 1.6051896978528415, "grad_norm": 0.05555310303098996, "learning_rate": 0.0001573700464840678, "loss": 0.8549, "step": 91430 }, { "epoch": 1.605365262732843, "grad_norm": 0.049754416506118025, "learning_rate": 0.00015736088534241835, "loss": 0.8493, "step": 91440 }, { "epoch": 1.6055408276128444, "grad_norm": 0.0669976484130464, "learning_rate": 0.00015735172348666567, "loss": 0.8455, "step": 91450 }, { "epoch": 1.6057163924928457, "grad_norm": 0.0519056548940216, "learning_rate": 0.00015734256091692585, "loss": 0.8525, "step": 91460 }, { "epoch": 1.6058919573728472, "grad_norm": 0.06012237694817399, "learning_rate": 0.000157333397633315, "loss": 0.8507, "step": 91470 }, { "epoch": 1.6060675222528484, "grad_norm": 0.05523918885921821, "learning_rate": 0.0001573242336359492, "loss": 0.8527, "step": 91480 }, { "epoch": 1.60624308713285, "grad_norm": 0.06273448362501817, "learning_rate": 0.00015731506892494458, "loss": 0.8479, "step": 91490 }, { "epoch": 1.6064186520128514, "grad_norm": 0.07332672344257368, "learning_rate": 0.00015730590350041722, "loss": 0.8472, "step": 91500 }, { "epoch": 1.6065942168928529, "grad_norm": 0.0711517682984081, "learning_rate": 0.00015729673736248327, "loss": 0.8485, "step": 91510 }, { "epoch": 1.6067697817728541, "grad_norm": 0.06142207533967349, "learning_rate": 0.0001572875705112589, "loss": 0.8475, "step": 91520 }, { "epoch": 1.6069453466528556, "grad_norm": 0.09750607802192332, "learning_rate": 0.0001572784029468602, "loss": 0.8488, "step": 91530 }, { "epoch": 1.6071209115328569, "grad_norm": 0.058634920511593684, "learning_rate": 0.00015726923466940332, "loss": 0.8513, "step": 91540 }, { "epoch": 1.6072964764128583, "grad_norm": 0.06017798755646878, "learning_rate": 0.00015726006567900452, "loss": 0.8473, "step": 91550 }, { "epoch": 1.6074720412928598, "grad_norm": 0.08858183466123853, "learning_rate": 0.00015725089597577987, "loss": 0.844, "step": 91560 }, { "epoch": 1.6076476061728613, "grad_norm": 0.07433450639938674, "learning_rate": 0.00015724172555984562, "loss": 0.8519, "step": 91570 }, { "epoch": 1.6078231710528625, "grad_norm": 0.04953709744656424, "learning_rate": 0.0001572325544313179, "loss": 0.8491, "step": 91580 }, { "epoch": 1.607998735932864, "grad_norm": 0.052256940816319195, "learning_rate": 0.00015722338259031293, "loss": 0.8528, "step": 91590 }, { "epoch": 1.6081743008128653, "grad_norm": 0.05201197726534871, "learning_rate": 0.00015721421003694693, "loss": 0.8433, "step": 91600 }, { "epoch": 1.6083498656928668, "grad_norm": 0.05651357186059328, "learning_rate": 0.0001572050367713361, "loss": 0.8452, "step": 91610 }, { "epoch": 1.6085254305728682, "grad_norm": 0.0632188573427134, "learning_rate": 0.00015719586279359671, "loss": 0.8532, "step": 91620 }, { "epoch": 1.6087009954528697, "grad_norm": 0.050272765852413456, "learning_rate": 0.00015718668810384495, "loss": 0.8443, "step": 91630 }, { "epoch": 1.608876560332871, "grad_norm": 0.04797464557409146, "learning_rate": 0.00015717751270219712, "loss": 0.8453, "step": 91640 }, { "epoch": 1.6090521252128724, "grad_norm": 0.058121839063618716, "learning_rate": 0.0001571683365887694, "loss": 0.8502, "step": 91650 }, { "epoch": 1.6092276900928737, "grad_norm": 0.061265995566192576, "learning_rate": 0.00015715915976367812, "loss": 0.8491, "step": 91660 }, { "epoch": 1.6094032549728752, "grad_norm": 0.07880892483378525, "learning_rate": 0.00015714998222703949, "loss": 0.8484, "step": 91670 }, { "epoch": 1.6095788198528767, "grad_norm": 0.05354571666830261, "learning_rate": 0.00015714080397896985, "loss": 0.8568, "step": 91680 }, { "epoch": 1.6097543847328781, "grad_norm": 0.07556497106677107, "learning_rate": 0.00015713162501958546, "loss": 0.8459, "step": 91690 }, { "epoch": 1.6099299496128794, "grad_norm": 0.057510117989226485, "learning_rate": 0.0001571224453490026, "loss": 0.8546, "step": 91700 }, { "epoch": 1.6101055144928809, "grad_norm": 0.0742856290567842, "learning_rate": 0.00015711326496733763, "loss": 0.8449, "step": 91710 }, { "epoch": 1.6102810793728821, "grad_norm": 0.058729546233274026, "learning_rate": 0.00015710408387470683, "loss": 0.8437, "step": 91720 }, { "epoch": 1.6104566442528836, "grad_norm": 0.06802876162784813, "learning_rate": 0.0001570949020712265, "loss": 0.8444, "step": 91730 }, { "epoch": 1.610632209132885, "grad_norm": 0.04834947761171741, "learning_rate": 0.00015708571955701307, "loss": 0.8508, "step": 91740 }, { "epoch": 1.6108077740128866, "grad_norm": 0.1018302778677276, "learning_rate": 0.00015707653633218277, "loss": 0.8499, "step": 91750 }, { "epoch": 1.610983338892888, "grad_norm": 0.05523449964262309, "learning_rate": 0.0001570673523968521, "loss": 0.848, "step": 91760 }, { "epoch": 1.6111589037728893, "grad_norm": 0.05754321803079446, "learning_rate": 0.0001570581677511372, "loss": 0.8516, "step": 91770 }, { "epoch": 1.6113344686528905, "grad_norm": 0.07653230022556887, "learning_rate": 0.00015704898239515464, "loss": 0.8434, "step": 91780 }, { "epoch": 1.611510033532892, "grad_norm": 0.05225267788296726, "learning_rate": 0.00015703979632902078, "loss": 0.8528, "step": 91790 }, { "epoch": 1.6116855984128935, "grad_norm": 0.06205477766080048, "learning_rate": 0.00015703060955285186, "loss": 0.8348, "step": 91800 }, { "epoch": 1.611861163292895, "grad_norm": 0.0888610816851985, "learning_rate": 0.00015702142206676443, "loss": 0.8474, "step": 91810 }, { "epoch": 1.6120367281728964, "grad_norm": 0.052633991622013314, "learning_rate": 0.00015701223387087486, "loss": 0.8443, "step": 91820 }, { "epoch": 1.6122122930528977, "grad_norm": 0.06485789066747355, "learning_rate": 0.0001570030449652995, "loss": 0.8514, "step": 91830 }, { "epoch": 1.612387857932899, "grad_norm": 0.06827918884383587, "learning_rate": 0.0001569938553501549, "loss": 0.8514, "step": 91840 }, { "epoch": 1.6125634228129004, "grad_norm": 0.062248641692512714, "learning_rate": 0.00015698466502555737, "loss": 0.8532, "step": 91850 }, { "epoch": 1.612738987692902, "grad_norm": 0.0733468429482594, "learning_rate": 0.0001569754739916234, "loss": 0.8394, "step": 91860 }, { "epoch": 1.6129145525729034, "grad_norm": 0.04796396540110186, "learning_rate": 0.00015696628224846947, "loss": 0.8477, "step": 91870 }, { "epoch": 1.6130901174529049, "grad_norm": 0.04677555675843478, "learning_rate": 0.00015695708979621205, "loss": 0.8538, "step": 91880 }, { "epoch": 1.6132656823329061, "grad_norm": 0.05745246204054577, "learning_rate": 0.00015694789663496753, "loss": 0.85, "step": 91890 }, { "epoch": 1.6134412472129074, "grad_norm": 0.07501941437791308, "learning_rate": 0.00015693870276485244, "loss": 0.8503, "step": 91900 }, { "epoch": 1.6136168120929089, "grad_norm": 0.04504932900697843, "learning_rate": 0.0001569295081859833, "loss": 0.8494, "step": 91910 }, { "epoch": 1.6137923769729103, "grad_norm": 0.07867292998033575, "learning_rate": 0.00015692031289847653, "loss": 0.8493, "step": 91920 }, { "epoch": 1.6139679418529118, "grad_norm": 0.05457835176155886, "learning_rate": 0.00015691111690244872, "loss": 0.8443, "step": 91930 }, { "epoch": 1.6141435067329133, "grad_norm": 0.06951346885478155, "learning_rate": 0.0001569019201980163, "loss": 0.8505, "step": 91940 }, { "epoch": 1.6143190716129145, "grad_norm": 0.05311360481657236, "learning_rate": 0.00015689272278529586, "loss": 0.8478, "step": 91950 }, { "epoch": 1.6144946364929158, "grad_norm": 0.06862041612558054, "learning_rate": 0.0001568835246644039, "loss": 0.8364, "step": 91960 }, { "epoch": 1.6146702013729173, "grad_norm": 0.05171109398779789, "learning_rate": 0.000156874325835457, "loss": 0.8528, "step": 91970 }, { "epoch": 1.6148457662529188, "grad_norm": 0.050311046416699666, "learning_rate": 0.00015686512629857165, "loss": 0.8468, "step": 91980 }, { "epoch": 1.6150213311329202, "grad_norm": 0.05678179343155231, "learning_rate": 0.00015685592605386447, "loss": 0.8504, "step": 91990 }, { "epoch": 1.6151968960129217, "grad_norm": 0.08147487310892643, "learning_rate": 0.00015684672510145198, "loss": 0.8537, "step": 92000 }, { "epoch": 1.615372460892923, "grad_norm": 0.059483420563188075, "learning_rate": 0.00015683752344145078, "loss": 0.8438, "step": 92010 }, { "epoch": 1.6155480257729242, "grad_norm": 0.0709812267798307, "learning_rate": 0.00015682832107397743, "loss": 0.8445, "step": 92020 }, { "epoch": 1.6157235906529257, "grad_norm": 0.0491997444644071, "learning_rate": 0.0001568191179991486, "loss": 0.8549, "step": 92030 }, { "epoch": 1.6158991555329272, "grad_norm": 0.050884178669810674, "learning_rate": 0.00015680991421708083, "loss": 0.8444, "step": 92040 }, { "epoch": 1.6160747204129287, "grad_norm": 0.05110720954694618, "learning_rate": 0.00015680070972789076, "loss": 0.8531, "step": 92050 }, { "epoch": 1.6162502852929301, "grad_norm": 0.06770730895974163, "learning_rate": 0.00015679150453169502, "loss": 0.842, "step": 92060 }, { "epoch": 1.6164258501729314, "grad_norm": 0.07627702140078982, "learning_rate": 0.0001567822986286102, "loss": 0.8462, "step": 92070 }, { "epoch": 1.6166014150529329, "grad_norm": 0.05620232437980508, "learning_rate": 0.000156773092018753, "loss": 0.8609, "step": 92080 }, { "epoch": 1.6167769799329341, "grad_norm": 0.04468474342703498, "learning_rate": 0.00015676388470224, "loss": 0.8482, "step": 92090 }, { "epoch": 1.6169525448129356, "grad_norm": 0.06187641558994192, "learning_rate": 0.00015675467667918789, "loss": 0.8522, "step": 92100 }, { "epoch": 1.617128109692937, "grad_norm": 0.08808014540767041, "learning_rate": 0.00015674546794971334, "loss": 0.8448, "step": 92110 }, { "epoch": 1.6173036745729386, "grad_norm": 0.051657050214011006, "learning_rate": 0.00015673625851393304, "loss": 0.8415, "step": 92120 }, { "epoch": 1.6174792394529398, "grad_norm": 0.0653644526622678, "learning_rate": 0.00015672704837196367, "loss": 0.8507, "step": 92130 }, { "epoch": 1.6176548043329413, "grad_norm": 0.05206445223771713, "learning_rate": 0.0001567178375239219, "loss": 0.8435, "step": 92140 }, { "epoch": 1.6178303692129425, "grad_norm": 0.08734013950763092, "learning_rate": 0.00015670862596992447, "loss": 0.8491, "step": 92150 }, { "epoch": 1.618005934092944, "grad_norm": 0.05811441579998736, "learning_rate": 0.00015669941371008805, "loss": 0.8477, "step": 92160 }, { "epoch": 1.6181814989729455, "grad_norm": 0.04488625629155584, "learning_rate": 0.0001566902007445294, "loss": 0.8479, "step": 92170 }, { "epoch": 1.618357063852947, "grad_norm": 0.06018926909245393, "learning_rate": 0.0001566809870733652, "loss": 0.8414, "step": 92180 }, { "epoch": 1.6185326287329482, "grad_norm": 0.07502576798546222, "learning_rate": 0.00015667177269671224, "loss": 0.8491, "step": 92190 }, { "epoch": 1.6187081936129497, "grad_norm": 0.09269385118498787, "learning_rate": 0.0001566625576146873, "loss": 0.8454, "step": 92200 }, { "epoch": 1.618883758492951, "grad_norm": 0.05277418204238713, "learning_rate": 0.00015665334182740702, "loss": 0.8457, "step": 92210 }, { "epoch": 1.6190593233729524, "grad_norm": 0.057901920981282426, "learning_rate": 0.00015664412533498826, "loss": 0.8372, "step": 92220 }, { "epoch": 1.619234888252954, "grad_norm": 0.05391818838765353, "learning_rate": 0.0001566349081375478, "loss": 0.8534, "step": 92230 }, { "epoch": 1.6194104531329554, "grad_norm": 0.05655681662066759, "learning_rate": 0.00015662569023520233, "loss": 0.8482, "step": 92240 }, { "epoch": 1.6195860180129567, "grad_norm": 0.054086050835373135, "learning_rate": 0.00015661647162806873, "loss": 0.846, "step": 92250 }, { "epoch": 1.6197615828929581, "grad_norm": 0.05205043246701837, "learning_rate": 0.0001566072523162638, "loss": 0.8568, "step": 92260 }, { "epoch": 1.6199371477729594, "grad_norm": 0.10623413832666874, "learning_rate": 0.0001565980322999043, "loss": 0.8468, "step": 92270 }, { "epoch": 1.6201127126529609, "grad_norm": 0.06000227901884539, "learning_rate": 0.0001565888115791071, "loss": 0.8464, "step": 92280 }, { "epoch": 1.6202882775329623, "grad_norm": 0.04775154507919116, "learning_rate": 0.00015657959015398897, "loss": 0.8459, "step": 92290 }, { "epoch": 1.6204638424129638, "grad_norm": 0.06069640780448736, "learning_rate": 0.0001565703680246668, "loss": 0.8469, "step": 92300 }, { "epoch": 1.620639407292965, "grad_norm": 0.051700307545915654, "learning_rate": 0.0001565611451912574, "loss": 0.8503, "step": 92310 }, { "epoch": 1.6208149721729666, "grad_norm": 0.051811252949011004, "learning_rate": 0.00015655192165387767, "loss": 0.8448, "step": 92320 }, { "epoch": 1.6209905370529678, "grad_norm": 0.05988035092879162, "learning_rate": 0.00015654269741264442, "loss": 0.8467, "step": 92330 }, { "epoch": 1.6211661019329693, "grad_norm": 0.052679319105170645, "learning_rate": 0.0001565334724676746, "loss": 0.8516, "step": 92340 }, { "epoch": 1.6213416668129708, "grad_norm": 0.05981028271487582, "learning_rate": 0.00015652424681908498, "loss": 0.8418, "step": 92350 }, { "epoch": 1.6215172316929722, "grad_norm": 0.04999591846751715, "learning_rate": 0.00015651502046699252, "loss": 0.8503, "step": 92360 }, { "epoch": 1.6216927965729735, "grad_norm": 0.054951380721945775, "learning_rate": 0.00015650579341151415, "loss": 0.8429, "step": 92370 }, { "epoch": 1.621868361452975, "grad_norm": 0.062003664449568646, "learning_rate": 0.0001564965656527667, "loss": 0.8426, "step": 92380 }, { "epoch": 1.6220439263329762, "grad_norm": 0.04788974130819218, "learning_rate": 0.00015648733719086713, "loss": 0.8401, "step": 92390 }, { "epoch": 1.6222194912129777, "grad_norm": 0.05832114099005633, "learning_rate": 0.0001564781080259324, "loss": 0.8502, "step": 92400 }, { "epoch": 1.6223950560929792, "grad_norm": 0.05592410725133814, "learning_rate": 0.00015646887815807937, "loss": 0.8557, "step": 92410 }, { "epoch": 1.6225706209729807, "grad_norm": 0.07455191797722206, "learning_rate": 0.00015645964758742505, "loss": 0.8497, "step": 92420 }, { "epoch": 1.622746185852982, "grad_norm": 0.06064521646808161, "learning_rate": 0.00015645041631408638, "loss": 0.8466, "step": 92430 }, { "epoch": 1.6229217507329834, "grad_norm": 0.08176374048337834, "learning_rate": 0.00015644118433818025, "loss": 0.8439, "step": 92440 }, { "epoch": 1.6230973156129846, "grad_norm": 0.05716780033480381, "learning_rate": 0.0001564319516598237, "loss": 0.8537, "step": 92450 }, { "epoch": 1.6232728804929861, "grad_norm": 0.059353820442681365, "learning_rate": 0.00015642271827913374, "loss": 0.8402, "step": 92460 }, { "epoch": 1.6234484453729876, "grad_norm": 0.0950327231947554, "learning_rate": 0.0001564134841962273, "loss": 0.849, "step": 92470 }, { "epoch": 1.623624010252989, "grad_norm": 0.06730761709434536, "learning_rate": 0.00015640424941122137, "loss": 0.8488, "step": 92480 }, { "epoch": 1.6237995751329906, "grad_norm": 0.05946700461846626, "learning_rate": 0.00015639501392423303, "loss": 0.8522, "step": 92490 }, { "epoch": 1.6239751400129918, "grad_norm": 0.07683308542570587, "learning_rate": 0.0001563857777353792, "loss": 0.8437, "step": 92500 }, { "epoch": 1.624150704892993, "grad_norm": 0.05460451362865213, "learning_rate": 0.00015637654084477699, "loss": 0.8509, "step": 92510 }, { "epoch": 1.6243262697729945, "grad_norm": 0.06098668570077405, "learning_rate": 0.00015636730325254339, "loss": 0.8461, "step": 92520 }, { "epoch": 1.624501834652996, "grad_norm": 0.04020228689400136, "learning_rate": 0.00015635806495879542, "loss": 0.8501, "step": 92530 }, { "epoch": 1.6246773995329975, "grad_norm": 0.06338296593068658, "learning_rate": 0.0001563488259636502, "loss": 0.8474, "step": 92540 }, { "epoch": 1.624852964412999, "grad_norm": 0.06315611513719134, "learning_rate": 0.0001563395862672247, "loss": 0.8652, "step": 92550 }, { "epoch": 1.6250285292930002, "grad_norm": 0.07746296093124516, "learning_rate": 0.00015633034586963603, "loss": 0.8461, "step": 92560 }, { "epoch": 1.6252040941730015, "grad_norm": 0.061099130381150356, "learning_rate": 0.00015632110477100132, "loss": 0.8535, "step": 92570 }, { "epoch": 1.625379659053003, "grad_norm": 0.044475952703707945, "learning_rate": 0.0001563118629714376, "loss": 0.8471, "step": 92580 }, { "epoch": 1.6255552239330044, "grad_norm": 0.05392082821257424, "learning_rate": 0.00015630262047106196, "loss": 0.8482, "step": 92590 }, { "epoch": 1.625730788813006, "grad_norm": 0.047080834990325074, "learning_rate": 0.00015629337726999154, "loss": 0.8563, "step": 92600 }, { "epoch": 1.6259063536930074, "grad_norm": 0.04985045811453559, "learning_rate": 0.0001562841333683434, "loss": 0.8487, "step": 92610 }, { "epoch": 1.6260819185730087, "grad_norm": 0.043029943669385715, "learning_rate": 0.00015627488876623474, "loss": 0.8445, "step": 92620 }, { "epoch": 1.62625748345301, "grad_norm": 0.08720968253128636, "learning_rate": 0.00015626564346378258, "loss": 0.8501, "step": 92630 }, { "epoch": 1.6264330483330114, "grad_norm": 0.05658301616063183, "learning_rate": 0.0001562563974611042, "loss": 0.8462, "step": 92640 }, { "epoch": 1.6266086132130129, "grad_norm": 0.09706837569948921, "learning_rate": 0.00015624715075831662, "loss": 0.8428, "step": 92650 }, { "epoch": 1.6267841780930143, "grad_norm": 0.057333635734844596, "learning_rate": 0.0001562379033555371, "loss": 0.8496, "step": 92660 }, { "epoch": 1.6269597429730158, "grad_norm": 0.07594648278293901, "learning_rate": 0.00015622865525288272, "loss": 0.8452, "step": 92670 }, { "epoch": 1.627135307853017, "grad_norm": 0.06458515158404665, "learning_rate": 0.0001562194064504707, "loss": 0.8465, "step": 92680 }, { "epoch": 1.6273108727330183, "grad_norm": 0.06427933638924332, "learning_rate": 0.00015621015694841823, "loss": 0.8425, "step": 92690 }, { "epoch": 1.6274864376130198, "grad_norm": 0.07018146426506693, "learning_rate": 0.00015620090674684246, "loss": 0.8494, "step": 92700 }, { "epoch": 1.6276620024930213, "grad_norm": 0.047801818249598976, "learning_rate": 0.00015619165584586066, "loss": 0.8466, "step": 92710 }, { "epoch": 1.6278375673730228, "grad_norm": 0.08840509540208691, "learning_rate": 0.00015618240424558995, "loss": 0.8465, "step": 92720 }, { "epoch": 1.6280131322530242, "grad_norm": 0.056129550395446495, "learning_rate": 0.00015617315194614768, "loss": 0.8491, "step": 92730 }, { "epoch": 1.6281886971330255, "grad_norm": 0.05991943635206426, "learning_rate": 0.00015616389894765095, "loss": 0.8441, "step": 92740 }, { "epoch": 1.6283642620130268, "grad_norm": 0.06768700756135608, "learning_rate": 0.00015615464525021706, "loss": 0.8542, "step": 92750 }, { "epoch": 1.6285398268930282, "grad_norm": 0.07001517560391825, "learning_rate": 0.0001561453908539632, "loss": 0.8476, "step": 92760 }, { "epoch": 1.6287153917730297, "grad_norm": 0.05902598661337365, "learning_rate": 0.00015613613575900676, "loss": 0.8525, "step": 92770 }, { "epoch": 1.6288909566530312, "grad_norm": 0.048001676880633906, "learning_rate": 0.00015612687996546486, "loss": 0.8489, "step": 92780 }, { "epoch": 1.6290665215330327, "grad_norm": 0.0478673309183264, "learning_rate": 0.00015611762347345483, "loss": 0.85, "step": 92790 }, { "epoch": 1.629242086413034, "grad_norm": 0.06589338471964809, "learning_rate": 0.00015610836628309396, "loss": 0.8441, "step": 92800 }, { "epoch": 1.6294176512930354, "grad_norm": 0.05964627486753238, "learning_rate": 0.0001560991083944995, "loss": 0.8576, "step": 92810 }, { "epoch": 1.6295932161730367, "grad_norm": 0.07594940725536976, "learning_rate": 0.0001560898498077888, "loss": 0.8443, "step": 92820 }, { "epoch": 1.6297687810530381, "grad_norm": 0.05065945871894749, "learning_rate": 0.00015608059052307915, "loss": 0.8432, "step": 92830 }, { "epoch": 1.6299443459330396, "grad_norm": 0.045733954687040446, "learning_rate": 0.00015607133054048783, "loss": 0.8525, "step": 92840 }, { "epoch": 1.630119910813041, "grad_norm": 0.042836778898670415, "learning_rate": 0.00015606206986013224, "loss": 0.855, "step": 92850 }, { "epoch": 1.6302954756930423, "grad_norm": 0.06059820391276282, "learning_rate": 0.00015605280848212966, "loss": 0.8508, "step": 92860 }, { "epoch": 1.6304710405730438, "grad_norm": 0.05527278156083065, "learning_rate": 0.00015604354640659743, "loss": 0.8521, "step": 92870 }, { "epoch": 1.630646605453045, "grad_norm": 0.05280593888780836, "learning_rate": 0.0001560342836336529, "loss": 0.844, "step": 92880 }, { "epoch": 1.6308221703330466, "grad_norm": 0.053537481813829516, "learning_rate": 0.00015602502016341348, "loss": 0.85, "step": 92890 }, { "epoch": 1.630997735213048, "grad_norm": 0.0711011242989417, "learning_rate": 0.0001560157559959965, "loss": 0.838, "step": 92900 }, { "epoch": 1.6311733000930495, "grad_norm": 0.05246343666878841, "learning_rate": 0.00015600649113151936, "loss": 0.8484, "step": 92910 }, { "epoch": 1.6313488649730508, "grad_norm": 0.09514382178138732, "learning_rate": 0.00015599722557009941, "loss": 0.8483, "step": 92920 }, { "epoch": 1.6315244298530522, "grad_norm": 0.060782909387991836, "learning_rate": 0.0001559879593118541, "loss": 0.8446, "step": 92930 }, { "epoch": 1.6316999947330535, "grad_norm": 0.08349018865739723, "learning_rate": 0.00015597869235690082, "loss": 0.8524, "step": 92940 }, { "epoch": 1.631875559613055, "grad_norm": 0.07676068845839809, "learning_rate": 0.00015596942470535693, "loss": 0.8507, "step": 92950 }, { "epoch": 1.6320511244930564, "grad_norm": 0.06490228821647631, "learning_rate": 0.00015596015635733988, "loss": 0.8478, "step": 92960 }, { "epoch": 1.632226689373058, "grad_norm": 0.061407681133224236, "learning_rate": 0.00015595088731296716, "loss": 0.8509, "step": 92970 }, { "epoch": 1.6324022542530592, "grad_norm": 0.05528066562560166, "learning_rate": 0.00015594161757235617, "loss": 0.8503, "step": 92980 }, { "epoch": 1.6325778191330607, "grad_norm": 0.06646586417675106, "learning_rate": 0.00015593234713562432, "loss": 0.8397, "step": 92990 }, { "epoch": 1.632753384013062, "grad_norm": 0.05357567509097248, "learning_rate": 0.00015592307600288913, "loss": 0.8454, "step": 93000 }, { "epoch": 1.6329289488930634, "grad_norm": 0.06800918316989468, "learning_rate": 0.00015591380417426804, "loss": 0.8505, "step": 93010 }, { "epoch": 1.6331045137730649, "grad_norm": 0.05796922906380319, "learning_rate": 0.00015590453164987853, "loss": 0.8535, "step": 93020 }, { "epoch": 1.6332800786530663, "grad_norm": 0.06953662039716438, "learning_rate": 0.00015589525842983805, "loss": 0.848, "step": 93030 }, { "epoch": 1.6334556435330676, "grad_norm": 0.05921364776863275, "learning_rate": 0.00015588598451426414, "loss": 0.8447, "step": 93040 }, { "epoch": 1.633631208413069, "grad_norm": 0.06279521326048645, "learning_rate": 0.0001558767099032743, "loss": 0.8455, "step": 93050 }, { "epoch": 1.6338067732930703, "grad_norm": 0.06335954920015711, "learning_rate": 0.00015586743459698602, "loss": 0.8456, "step": 93060 }, { "epoch": 1.6339823381730718, "grad_norm": 0.07522804216287529, "learning_rate": 0.00015585815859551686, "loss": 0.8507, "step": 93070 }, { "epoch": 1.6341579030530733, "grad_norm": 0.050495621690397285, "learning_rate": 0.00015584888189898428, "loss": 0.8475, "step": 93080 }, { "epoch": 1.6343334679330748, "grad_norm": 0.05904948401838302, "learning_rate": 0.00015583960450750586, "loss": 0.8545, "step": 93090 }, { "epoch": 1.634509032813076, "grad_norm": 0.05453140439933258, "learning_rate": 0.00015583032642119916, "loss": 0.8497, "step": 93100 }, { "epoch": 1.6346845976930775, "grad_norm": 0.06705514986193681, "learning_rate": 0.00015582104764018167, "loss": 0.8461, "step": 93110 }, { "epoch": 1.6348601625730788, "grad_norm": 0.06563125586449993, "learning_rate": 0.00015581176816457107, "loss": 0.853, "step": 93120 }, { "epoch": 1.6350357274530802, "grad_norm": 0.06402283246967556, "learning_rate": 0.00015580248799448477, "loss": 0.845, "step": 93130 }, { "epoch": 1.6352112923330817, "grad_norm": 0.04390886009995161, "learning_rate": 0.0001557932071300405, "loss": 0.8543, "step": 93140 }, { "epoch": 1.6353868572130832, "grad_norm": 0.10662917787309614, "learning_rate": 0.00015578392557135583, "loss": 0.8448, "step": 93150 }, { "epoch": 1.6355624220930844, "grad_norm": 0.04654190046370772, "learning_rate": 0.00015577464331854824, "loss": 0.8519, "step": 93160 }, { "epoch": 1.635737986973086, "grad_norm": 0.06630467115983776, "learning_rate": 0.0001557653603717355, "loss": 0.8542, "step": 93170 }, { "epoch": 1.6359135518530872, "grad_norm": 0.08799258814637599, "learning_rate": 0.0001557560767310351, "loss": 0.8481, "step": 93180 }, { "epoch": 1.6360891167330887, "grad_norm": 0.05039967452199262, "learning_rate": 0.00015574679239656471, "loss": 0.8536, "step": 93190 }, { "epoch": 1.6362646816130901, "grad_norm": 0.06279902911816634, "learning_rate": 0.00015573750736844202, "loss": 0.8513, "step": 93200 }, { "epoch": 1.6364402464930916, "grad_norm": 0.06017277823471987, "learning_rate": 0.00015572822164678455, "loss": 0.8484, "step": 93210 }, { "epoch": 1.636615811373093, "grad_norm": 0.05842352388928956, "learning_rate": 0.00015571893523171008, "loss": 0.8442, "step": 93220 }, { "epoch": 1.6367913762530943, "grad_norm": 0.058063863848250075, "learning_rate": 0.0001557096481233362, "loss": 0.8434, "step": 93230 }, { "epoch": 1.6369669411330956, "grad_norm": 0.056090845713304735, "learning_rate": 0.0001557003603217806, "loss": 0.8486, "step": 93240 }, { "epoch": 1.637142506013097, "grad_norm": 0.06565896056368768, "learning_rate": 0.00015569107182716093, "loss": 0.8532, "step": 93250 }, { "epoch": 1.6373180708930986, "grad_norm": 0.05310025301412718, "learning_rate": 0.00015568178263959488, "loss": 0.8398, "step": 93260 }, { "epoch": 1.6374936357731, "grad_norm": 0.053287086216140096, "learning_rate": 0.00015567249275920023, "loss": 0.8482, "step": 93270 }, { "epoch": 1.6376692006531015, "grad_norm": 0.07995294826187803, "learning_rate": 0.00015566320218609458, "loss": 0.8455, "step": 93280 }, { "epoch": 1.6378447655331028, "grad_norm": 0.06031950862694152, "learning_rate": 0.0001556539109203957, "loss": 0.8503, "step": 93290 }, { "epoch": 1.638020330413104, "grad_norm": 0.06577506622315833, "learning_rate": 0.00015564461896222127, "loss": 0.8432, "step": 93300 }, { "epoch": 1.6381958952931055, "grad_norm": 0.06687395733221028, "learning_rate": 0.00015563532631168904, "loss": 0.8517, "step": 93310 }, { "epoch": 1.638371460173107, "grad_norm": 0.06785014369017232, "learning_rate": 0.00015562603296891678, "loss": 0.8503, "step": 93320 }, { "epoch": 1.6385470250531085, "grad_norm": 0.0531955730099773, "learning_rate": 0.00015561673893402217, "loss": 0.8429, "step": 93330 }, { "epoch": 1.63872258993311, "grad_norm": 0.08283356934745809, "learning_rate": 0.00015560744420712306, "loss": 0.8449, "step": 93340 }, { "epoch": 1.6388981548131112, "grad_norm": 0.061719933800836534, "learning_rate": 0.00015559814878833716, "loss": 0.8496, "step": 93350 }, { "epoch": 1.6390737196931124, "grad_norm": 0.04819686682550963, "learning_rate": 0.0001555888526777822, "loss": 0.8394, "step": 93360 }, { "epoch": 1.639249284573114, "grad_norm": 0.049265222046182666, "learning_rate": 0.0001555795558755761, "loss": 0.8429, "step": 93370 }, { "epoch": 1.6394248494531154, "grad_norm": 0.05110838008035661, "learning_rate": 0.0001555702583818365, "loss": 0.8434, "step": 93380 }, { "epoch": 1.6396004143331169, "grad_norm": 0.05464654086886538, "learning_rate": 0.0001555609601966813, "loss": 0.8458, "step": 93390 }, { "epoch": 1.6397759792131184, "grad_norm": 0.04438792208593837, "learning_rate": 0.00015555166132022826, "loss": 0.8453, "step": 93400 }, { "epoch": 1.6399515440931196, "grad_norm": 0.08005731176164475, "learning_rate": 0.0001555423617525952, "loss": 0.856, "step": 93410 }, { "epoch": 1.6401271089731209, "grad_norm": 0.053448918489175615, "learning_rate": 0.00015553306149390002, "loss": 0.8435, "step": 93420 }, { "epoch": 1.6403026738531223, "grad_norm": 0.04861196482139708, "learning_rate": 0.00015552376054426043, "loss": 0.85, "step": 93430 }, { "epoch": 1.6404782387331238, "grad_norm": 0.05791412061152611, "learning_rate": 0.0001555144589037944, "loss": 0.8463, "step": 93440 }, { "epoch": 1.6406538036131253, "grad_norm": 0.09148201984048321, "learning_rate": 0.0001555051565726197, "loss": 0.8504, "step": 93450 }, { "epoch": 1.6408293684931268, "grad_norm": 0.05559436393345338, "learning_rate": 0.00015549585355085422, "loss": 0.8522, "step": 93460 }, { "epoch": 1.641004933373128, "grad_norm": 0.06479530829217506, "learning_rate": 0.00015548654983861584, "loss": 0.8452, "step": 93470 }, { "epoch": 1.6411804982531293, "grad_norm": 0.054691204195973876, "learning_rate": 0.00015547724543602245, "loss": 0.8432, "step": 93480 }, { "epoch": 1.6413560631331308, "grad_norm": 0.07430663417027324, "learning_rate": 0.0001554679403431919, "loss": 0.8454, "step": 93490 }, { "epoch": 1.6415316280131322, "grad_norm": 0.04644353638181911, "learning_rate": 0.0001554586345602421, "loss": 0.8484, "step": 93500 }, { "epoch": 1.6417071928931337, "grad_norm": 0.059197014048850756, "learning_rate": 0.000155449328087291, "loss": 0.8482, "step": 93510 }, { "epoch": 1.6418827577731352, "grad_norm": 0.06211160599815429, "learning_rate": 0.0001554400209244564, "loss": 0.8418, "step": 93520 }, { "epoch": 1.6420583226531364, "grad_norm": 0.07581239298340244, "learning_rate": 0.00015543071307185638, "loss": 0.8468, "step": 93530 }, { "epoch": 1.642233887533138, "grad_norm": 0.0664447821428876, "learning_rate": 0.00015542140452960877, "loss": 0.8433, "step": 93540 }, { "epoch": 1.6424094524131392, "grad_norm": 0.06474086576010334, "learning_rate": 0.0001554120952978315, "loss": 0.8511, "step": 93550 }, { "epoch": 1.6425850172931407, "grad_norm": 0.04850172578644998, "learning_rate": 0.0001554027853766426, "loss": 0.8497, "step": 93560 }, { "epoch": 1.6427605821731421, "grad_norm": 0.0617262399023797, "learning_rate": 0.00015539347476616, "loss": 0.8429, "step": 93570 }, { "epoch": 1.6429361470531436, "grad_norm": 0.05570664650435086, "learning_rate": 0.0001553841634665016, "loss": 0.8517, "step": 93580 }, { "epoch": 1.6431117119331449, "grad_norm": 0.06196171108592767, "learning_rate": 0.00015537485147778542, "loss": 0.851, "step": 93590 }, { "epoch": 1.6432872768131463, "grad_norm": 0.04555415117647718, "learning_rate": 0.00015536553880012949, "loss": 0.8511, "step": 93600 }, { "epoch": 1.6434628416931476, "grad_norm": 0.05795995480714154, "learning_rate": 0.00015535622543365175, "loss": 0.8452, "step": 93610 }, { "epoch": 1.643638406573149, "grad_norm": 0.04096322037153918, "learning_rate": 0.00015534691137847022, "loss": 0.8469, "step": 93620 }, { "epoch": 1.6438139714531506, "grad_norm": 0.052163042130873555, "learning_rate": 0.0001553375966347029, "loss": 0.8509, "step": 93630 }, { "epoch": 1.643989536333152, "grad_norm": 0.06383318608177115, "learning_rate": 0.00015532828120246782, "loss": 0.8381, "step": 93640 }, { "epoch": 1.6441651012131533, "grad_norm": 0.0647405964220709, "learning_rate": 0.000155318965081883, "loss": 0.8505, "step": 93650 }, { "epoch": 1.6443406660931548, "grad_norm": 0.05256998130176413, "learning_rate": 0.00015530964827306648, "loss": 0.8454, "step": 93660 }, { "epoch": 1.644516230973156, "grad_norm": 0.07846188712848794, "learning_rate": 0.00015530033077613633, "loss": 0.8494, "step": 93670 }, { "epoch": 1.6446917958531575, "grad_norm": 0.06589822488816825, "learning_rate": 0.00015529101259121057, "loss": 0.8421, "step": 93680 }, { "epoch": 1.644867360733159, "grad_norm": 0.05689007016667135, "learning_rate": 0.00015528169371840727, "loss": 0.8471, "step": 93690 }, { "epoch": 1.6450429256131605, "grad_norm": 0.052201534093170285, "learning_rate": 0.0001552723741578445, "loss": 0.8449, "step": 93700 }, { "epoch": 1.6452184904931617, "grad_norm": 0.05236402395271451, "learning_rate": 0.00015526305390964036, "loss": 0.8493, "step": 93710 }, { "epoch": 1.6453940553731632, "grad_norm": 0.05183680664718421, "learning_rate": 0.00015525373297391293, "loss": 0.8392, "step": 93720 }, { "epoch": 1.6455696202531644, "grad_norm": 0.07425804658727077, "learning_rate": 0.00015524441135078028, "loss": 0.8501, "step": 93730 }, { "epoch": 1.645745185133166, "grad_norm": 0.04579067205453503, "learning_rate": 0.00015523508904036057, "loss": 0.8584, "step": 93740 }, { "epoch": 1.6459207500131674, "grad_norm": 0.06107426372735321, "learning_rate": 0.0001552257660427719, "loss": 0.8437, "step": 93750 }, { "epoch": 1.6460963148931689, "grad_norm": 0.05544553027530088, "learning_rate": 0.00015521644235813234, "loss": 0.8524, "step": 93760 }, { "epoch": 1.6462718797731701, "grad_norm": 0.060843913468603426, "learning_rate": 0.0001552071179865601, "loss": 0.8451, "step": 93770 }, { "epoch": 1.6464474446531716, "grad_norm": 0.059856282129230325, "learning_rate": 0.0001551977929281733, "loss": 0.8479, "step": 93780 }, { "epoch": 1.6466230095331729, "grad_norm": 0.07649819939639745, "learning_rate": 0.00015518846718309004, "loss": 0.8533, "step": 93790 }, { "epoch": 1.6467985744131743, "grad_norm": 0.06440625372464633, "learning_rate": 0.00015517914075142852, "loss": 0.8538, "step": 93800 }, { "epoch": 1.6469741392931758, "grad_norm": 0.05001188564018746, "learning_rate": 0.00015516981363330692, "loss": 0.8604, "step": 93810 }, { "epoch": 1.6471497041731773, "grad_norm": 0.07793289713735553, "learning_rate": 0.00015516048582884338, "loss": 0.8502, "step": 93820 }, { "epoch": 1.6473252690531786, "grad_norm": 0.05307295104968642, "learning_rate": 0.00015515115733815614, "loss": 0.8502, "step": 93830 }, { "epoch": 1.64750083393318, "grad_norm": 0.07381462648311013, "learning_rate": 0.00015514182816136332, "loss": 0.8482, "step": 93840 }, { "epoch": 1.6476763988131813, "grad_norm": 0.08417456990063926, "learning_rate": 0.00015513249829858316, "loss": 0.848, "step": 93850 }, { "epoch": 1.6478519636931828, "grad_norm": 0.060698290657091605, "learning_rate": 0.00015512316774993388, "loss": 0.8494, "step": 93860 }, { "epoch": 1.6480275285731842, "grad_norm": 0.051960754627348706, "learning_rate": 0.0001551138365155337, "loss": 0.8485, "step": 93870 }, { "epoch": 1.6482030934531857, "grad_norm": 0.07414041524745645, "learning_rate": 0.00015510450459550087, "loss": 0.8539, "step": 93880 }, { "epoch": 1.648378658333187, "grad_norm": 0.06263852941481925, "learning_rate": 0.00015509517198995353, "loss": 0.8516, "step": 93890 }, { "epoch": 1.6485542232131885, "grad_norm": 0.06258250408379215, "learning_rate": 0.00015508583869901005, "loss": 0.8532, "step": 93900 }, { "epoch": 1.6487297880931897, "grad_norm": 0.062179561793404114, "learning_rate": 0.00015507650472278858, "loss": 0.8512, "step": 93910 }, { "epoch": 1.6489053529731912, "grad_norm": 0.07605777211527548, "learning_rate": 0.0001550671700614075, "loss": 0.8439, "step": 93920 }, { "epoch": 1.6490809178531927, "grad_norm": 0.05049319573563785, "learning_rate": 0.000155057834714985, "loss": 0.8496, "step": 93930 }, { "epoch": 1.6492564827331941, "grad_norm": 0.05319168856763329, "learning_rate": 0.00015504849868363927, "loss": 0.8407, "step": 93940 }, { "epoch": 1.6494320476131956, "grad_norm": 0.06390376083304793, "learning_rate": 0.0001550391619674888, "loss": 0.845, "step": 93950 }, { "epoch": 1.6496076124931969, "grad_norm": 0.07940219400139815, "learning_rate": 0.00015502982456665177, "loss": 0.8412, "step": 93960 }, { "epoch": 1.6497831773731981, "grad_norm": 0.07643779432546992, "learning_rate": 0.00015502048648124655, "loss": 0.8488, "step": 93970 }, { "epoch": 1.6499587422531996, "grad_norm": 0.04814489940064188, "learning_rate": 0.00015501114771139133, "loss": 0.8423, "step": 93980 }, { "epoch": 1.650134307133201, "grad_norm": 0.05757350434904797, "learning_rate": 0.0001550018082572046, "loss": 0.8477, "step": 93990 }, { "epoch": 1.6503098720132026, "grad_norm": 0.06305465977453316, "learning_rate": 0.00015499246811880458, "loss": 0.8394, "step": 94000 }, { "epoch": 1.650485436893204, "grad_norm": 0.07196151745045651, "learning_rate": 0.00015498312729630962, "loss": 0.8429, "step": 94010 }, { "epoch": 1.6506610017732053, "grad_norm": 0.09394280001404458, "learning_rate": 0.0001549737857898381, "loss": 0.8536, "step": 94020 }, { "epoch": 1.6508365666532065, "grad_norm": 0.05935638606684838, "learning_rate": 0.00015496444359950834, "loss": 0.8501, "step": 94030 }, { "epoch": 1.651012131533208, "grad_norm": 0.06308258834723679, "learning_rate": 0.00015495510072543878, "loss": 0.8477, "step": 94040 }, { "epoch": 1.6511876964132095, "grad_norm": 0.08647834662797356, "learning_rate": 0.00015494575716774775, "loss": 0.8441, "step": 94050 }, { "epoch": 1.651363261293211, "grad_norm": 0.048104074588721006, "learning_rate": 0.00015493641292655362, "loss": 0.8494, "step": 94060 }, { "epoch": 1.6515388261732125, "grad_norm": 0.05554638306732061, "learning_rate": 0.00015492706800197484, "loss": 0.8518, "step": 94070 }, { "epoch": 1.6517143910532137, "grad_norm": 0.059080115927504874, "learning_rate": 0.00015491772239412973, "loss": 0.8491, "step": 94080 }, { "epoch": 1.651889955933215, "grad_norm": 0.06895319034438686, "learning_rate": 0.00015490837610313677, "loss": 0.8504, "step": 94090 }, { "epoch": 1.6520655208132164, "grad_norm": 0.05458873426914941, "learning_rate": 0.00015489902912911433, "loss": 0.8486, "step": 94100 }, { "epoch": 1.652241085693218, "grad_norm": 0.05560483251210775, "learning_rate": 0.0001548896814721809, "loss": 0.8488, "step": 94110 }, { "epoch": 1.6524166505732194, "grad_norm": 0.048295533471829764, "learning_rate": 0.00015488033313245484, "loss": 0.8511, "step": 94120 }, { "epoch": 1.6525922154532209, "grad_norm": 0.06449020064957284, "learning_rate": 0.00015487098411005464, "loss": 0.8448, "step": 94130 }, { "epoch": 1.6527677803332221, "grad_norm": 0.0644914040603378, "learning_rate": 0.00015486163440509875, "loss": 0.8432, "step": 94140 }, { "epoch": 1.6529433452132234, "grad_norm": 0.06695852403175741, "learning_rate": 0.00015485228401770565, "loss": 0.841, "step": 94150 }, { "epoch": 1.6531189100932249, "grad_norm": 0.060649813792337824, "learning_rate": 0.00015484293294799377, "loss": 0.8433, "step": 94160 }, { "epoch": 1.6532944749732263, "grad_norm": 0.05154235776261945, "learning_rate": 0.0001548335811960816, "loss": 0.8485, "step": 94170 }, { "epoch": 1.6534700398532278, "grad_norm": 0.0662789750813923, "learning_rate": 0.00015482422876208764, "loss": 0.8526, "step": 94180 }, { "epoch": 1.6536456047332293, "grad_norm": 0.06637706936509075, "learning_rate": 0.00015481487564613045, "loss": 0.8474, "step": 94190 }, { "epoch": 1.6538211696132306, "grad_norm": 0.05486441454981343, "learning_rate": 0.0001548055218483284, "loss": 0.8444, "step": 94200 }, { "epoch": 1.6539967344932318, "grad_norm": 0.0585510457124072, "learning_rate": 0.00015479616736880007, "loss": 0.8522, "step": 94210 }, { "epoch": 1.6541722993732333, "grad_norm": 0.06860656321016921, "learning_rate": 0.00015478681220766404, "loss": 0.846, "step": 94220 }, { "epoch": 1.6543478642532348, "grad_norm": 0.06079191009960637, "learning_rate": 0.00015477745636503876, "loss": 0.8491, "step": 94230 }, { "epoch": 1.6545234291332362, "grad_norm": 0.09852596808255608, "learning_rate": 0.0001547680998410428, "loss": 0.8491, "step": 94240 }, { "epoch": 1.6546989940132377, "grad_norm": 0.07640035708138483, "learning_rate": 0.00015475874263579472, "loss": 0.8478, "step": 94250 }, { "epoch": 1.654874558893239, "grad_norm": 0.05733989744372962, "learning_rate": 0.00015474938474941308, "loss": 0.8518, "step": 94260 }, { "epoch": 1.6550501237732405, "grad_norm": 0.061768454165044215, "learning_rate": 0.0001547400261820164, "loss": 0.8468, "step": 94270 }, { "epoch": 1.6552256886532417, "grad_norm": 0.04913559889787357, "learning_rate": 0.00015473066693372334, "loss": 0.8482, "step": 94280 }, { "epoch": 1.6554012535332432, "grad_norm": 0.050182701872418564, "learning_rate": 0.00015472130700465238, "loss": 0.8512, "step": 94290 }, { "epoch": 1.6555768184132447, "grad_norm": 0.049712644347393184, "learning_rate": 0.0001547119463949222, "loss": 0.8515, "step": 94300 }, { "epoch": 1.6557523832932461, "grad_norm": 0.052355285993523404, "learning_rate": 0.00015470258510465136, "loss": 0.838, "step": 94310 }, { "epoch": 1.6559279481732474, "grad_norm": 0.06082019438078716, "learning_rate": 0.00015469322313395846, "loss": 0.8584, "step": 94320 }, { "epoch": 1.6561035130532489, "grad_norm": 0.06196627713563566, "learning_rate": 0.00015468386048296212, "loss": 0.8416, "step": 94330 }, { "epoch": 1.6562790779332501, "grad_norm": 0.051304798110873816, "learning_rate": 0.00015467449715178105, "loss": 0.8405, "step": 94340 }, { "epoch": 1.6564546428132516, "grad_norm": 0.07634714383886805, "learning_rate": 0.00015466513314053374, "loss": 0.8432, "step": 94350 }, { "epoch": 1.656630207693253, "grad_norm": 0.05902483665988435, "learning_rate": 0.00015465576844933893, "loss": 0.8464, "step": 94360 }, { "epoch": 1.6568057725732546, "grad_norm": 0.06203872666027397, "learning_rate": 0.00015464640307831526, "loss": 0.8488, "step": 94370 }, { "epoch": 1.6569813374532558, "grad_norm": 0.05083904660498888, "learning_rate": 0.00015463703702758136, "loss": 0.8501, "step": 94380 }, { "epoch": 1.6571569023332573, "grad_norm": 0.06845376288983532, "learning_rate": 0.00015462767029725597, "loss": 0.8392, "step": 94390 }, { "epoch": 1.6573324672132586, "grad_norm": 0.07025925724014305, "learning_rate": 0.00015461830288745766, "loss": 0.8533, "step": 94400 }, { "epoch": 1.65750803209326, "grad_norm": 0.051028909821571905, "learning_rate": 0.0001546089347983052, "loss": 0.846, "step": 94410 }, { "epoch": 1.6576835969732615, "grad_norm": 0.05551652465323953, "learning_rate": 0.00015459956602991727, "loss": 0.854, "step": 94420 }, { "epoch": 1.657859161853263, "grad_norm": 0.059167172109166125, "learning_rate": 0.0001545901965824126, "loss": 0.8437, "step": 94430 }, { "epoch": 1.6580347267332642, "grad_norm": 0.07247794241456089, "learning_rate": 0.00015458082645590979, "loss": 0.8441, "step": 94440 }, { "epoch": 1.6582102916132657, "grad_norm": 0.052087652191734564, "learning_rate": 0.00015457145565052767, "loss": 0.8463, "step": 94450 }, { "epoch": 1.658385856493267, "grad_norm": 0.05552839503503336, "learning_rate": 0.00015456208416638495, "loss": 0.8465, "step": 94460 }, { "epoch": 1.6585614213732685, "grad_norm": 0.07350336977256625, "learning_rate": 0.00015455271200360039, "loss": 0.8498, "step": 94470 }, { "epoch": 1.65873698625327, "grad_norm": 0.0778836169498377, "learning_rate": 0.00015454333916229264, "loss": 0.8531, "step": 94480 }, { "epoch": 1.6589125511332714, "grad_norm": 0.046910876804091124, "learning_rate": 0.00015453396564258057, "loss": 0.849, "step": 94490 }, { "epoch": 1.6590881160132727, "grad_norm": 0.049343776155634116, "learning_rate": 0.00015452459144458288, "loss": 0.8458, "step": 94500 }, { "epoch": 1.6592636808932741, "grad_norm": 0.0640951212401291, "learning_rate": 0.00015451521656841837, "loss": 0.8481, "step": 94510 }, { "epoch": 1.6594392457732754, "grad_norm": 0.0661579133645821, "learning_rate": 0.0001545058410142058, "loss": 0.8484, "step": 94520 }, { "epoch": 1.6596148106532769, "grad_norm": 0.058245136879074184, "learning_rate": 0.00015449646478206397, "loss": 0.8506, "step": 94530 }, { "epoch": 1.6597903755332784, "grad_norm": 0.0851085771278295, "learning_rate": 0.00015448708787211166, "loss": 0.8472, "step": 94540 }, { "epoch": 1.6599659404132798, "grad_norm": 0.08560657758088117, "learning_rate": 0.00015447771028446772, "loss": 0.8473, "step": 94550 }, { "epoch": 1.660141505293281, "grad_norm": 0.04913711471625984, "learning_rate": 0.00015446833201925093, "loss": 0.85, "step": 94560 }, { "epoch": 1.6603170701732826, "grad_norm": 0.058415037585008746, "learning_rate": 0.00015445895307658014, "loss": 0.8525, "step": 94570 }, { "epoch": 1.6604926350532838, "grad_norm": 0.06051588690920928, "learning_rate": 0.00015444957345657416, "loss": 0.8434, "step": 94580 }, { "epoch": 1.6606681999332853, "grad_norm": 0.06502999798943816, "learning_rate": 0.00015444019315935184, "loss": 0.8472, "step": 94590 }, { "epoch": 1.6608437648132868, "grad_norm": 0.06547394639168876, "learning_rate": 0.000154430812185032, "loss": 0.8483, "step": 94600 }, { "epoch": 1.6610193296932882, "grad_norm": 0.06040695763740297, "learning_rate": 0.0001544214305337336, "loss": 0.8437, "step": 94610 }, { "epoch": 1.6611948945732895, "grad_norm": 0.047260642966078285, "learning_rate": 0.0001544120482055754, "loss": 0.8474, "step": 94620 }, { "epoch": 1.661370459453291, "grad_norm": 0.06451493923212984, "learning_rate": 0.0001544026652006763, "loss": 0.8557, "step": 94630 }, { "epoch": 1.6615460243332922, "grad_norm": 0.06449058626848733, "learning_rate": 0.00015439328151915522, "loss": 0.8496, "step": 94640 }, { "epoch": 1.6617215892132937, "grad_norm": 0.0505237372961502, "learning_rate": 0.000154383897161131, "loss": 0.8447, "step": 94650 }, { "epoch": 1.6618971540932952, "grad_norm": 0.04730810971482457, "learning_rate": 0.00015437451212672258, "loss": 0.8496, "step": 94660 }, { "epoch": 1.6620727189732967, "grad_norm": 0.054795461713024074, "learning_rate": 0.00015436512641604888, "loss": 0.8513, "step": 94670 }, { "epoch": 1.6622482838532981, "grad_norm": 0.057114086931881365, "learning_rate": 0.00015435574002922882, "loss": 0.8467, "step": 94680 }, { "epoch": 1.6624238487332994, "grad_norm": 0.09154235157838572, "learning_rate": 0.00015434635296638128, "loss": 0.8476, "step": 94690 }, { "epoch": 1.6625994136133007, "grad_norm": 0.04729752760629243, "learning_rate": 0.0001543369652276252, "loss": 0.843, "step": 94700 }, { "epoch": 1.6627749784933021, "grad_norm": 0.06743616115739447, "learning_rate": 0.0001543275768130796, "loss": 0.8484, "step": 94710 }, { "epoch": 1.6629505433733036, "grad_norm": 0.06486637125207566, "learning_rate": 0.00015431818772286335, "loss": 0.841, "step": 94720 }, { "epoch": 1.663126108253305, "grad_norm": 0.06785441879303486, "learning_rate": 0.00015430879795709542, "loss": 0.8497, "step": 94730 }, { "epoch": 1.6633016731333066, "grad_norm": 0.060553837762360035, "learning_rate": 0.00015429940751589486, "loss": 0.8424, "step": 94740 }, { "epoch": 1.6634772380133078, "grad_norm": 0.05016028787067897, "learning_rate": 0.00015429001639938057, "loss": 0.8412, "step": 94750 }, { "epoch": 1.663652802893309, "grad_norm": 0.05855630634702015, "learning_rate": 0.00015428062460767154, "loss": 0.8428, "step": 94760 }, { "epoch": 1.6638283677733106, "grad_norm": 0.04856941013046328, "learning_rate": 0.00015427123214088682, "loss": 0.8504, "step": 94770 }, { "epoch": 1.664003932653312, "grad_norm": 0.056211501000091715, "learning_rate": 0.00015426183899914535, "loss": 0.8399, "step": 94780 }, { "epoch": 1.6641794975333135, "grad_norm": 0.048404433527417384, "learning_rate": 0.0001542524451825662, "loss": 0.8411, "step": 94790 }, { "epoch": 1.664355062413315, "grad_norm": 0.053438917253350426, "learning_rate": 0.00015424305069126836, "loss": 0.8409, "step": 94800 }, { "epoch": 1.6645306272933162, "grad_norm": 0.05591890080352559, "learning_rate": 0.00015423365552537084, "loss": 0.8562, "step": 94810 }, { "epoch": 1.6647061921733175, "grad_norm": 0.051064000941507884, "learning_rate": 0.0001542242596849927, "loss": 0.8409, "step": 94820 }, { "epoch": 1.664881757053319, "grad_norm": 0.06567919418937265, "learning_rate": 0.000154214863170253, "loss": 0.8486, "step": 94830 }, { "epoch": 1.6650573219333205, "grad_norm": 0.07367443657191486, "learning_rate": 0.00015420546598127077, "loss": 0.8505, "step": 94840 }, { "epoch": 1.665232886813322, "grad_norm": 0.05483704316121613, "learning_rate": 0.0001541960681181651, "loss": 0.85, "step": 94850 }, { "epoch": 1.6654084516933234, "grad_norm": 0.049390431039181774, "learning_rate": 0.00015418666958105508, "loss": 0.8481, "step": 94860 }, { "epoch": 1.6655840165733247, "grad_norm": 0.054306688278250144, "learning_rate": 0.00015417727037005973, "loss": 0.8514, "step": 94870 }, { "epoch": 1.665759581453326, "grad_norm": 0.05981539102518558, "learning_rate": 0.00015416787048529816, "loss": 0.8494, "step": 94880 }, { "epoch": 1.6659351463333274, "grad_norm": 0.04216374937669482, "learning_rate": 0.0001541584699268895, "loss": 0.8429, "step": 94890 }, { "epoch": 1.6661107112133289, "grad_norm": 0.046192287740402536, "learning_rate": 0.00015414906869495287, "loss": 0.8532, "step": 94900 }, { "epoch": 1.6662862760933304, "grad_norm": 0.07041764951375896, "learning_rate": 0.00015413966678960733, "loss": 0.8448, "step": 94910 }, { "epoch": 1.6664618409733318, "grad_norm": 0.07588508886355223, "learning_rate": 0.00015413026421097202, "loss": 0.8536, "step": 94920 }, { "epoch": 1.666637405853333, "grad_norm": 0.04860613827140448, "learning_rate": 0.00015412086095916607, "loss": 0.8531, "step": 94930 }, { "epoch": 1.6668129707333343, "grad_norm": 0.0412051524729264, "learning_rate": 0.00015411145703430866, "loss": 0.8438, "step": 94940 }, { "epoch": 1.6669885356133358, "grad_norm": 0.0633930385704807, "learning_rate": 0.0001541020524365189, "loss": 0.846, "step": 94950 }, { "epoch": 1.6671641004933373, "grad_norm": 0.07066803017219622, "learning_rate": 0.00015409264716591595, "loss": 0.8474, "step": 94960 }, { "epoch": 1.6673396653733388, "grad_norm": 0.05713335731903404, "learning_rate": 0.000154083241222619, "loss": 0.8505, "step": 94970 }, { "epoch": 1.6675152302533403, "grad_norm": 0.042073746343784524, "learning_rate": 0.00015407383460674717, "loss": 0.8576, "step": 94980 }, { "epoch": 1.6676907951333415, "grad_norm": 0.06370565942288797, "learning_rate": 0.00015406442731841972, "loss": 0.8445, "step": 94990 }, { "epoch": 1.667866360013343, "grad_norm": 0.05512551013490039, "learning_rate": 0.00015405501935775582, "loss": 0.8489, "step": 95000 }, { "epoch": 1.6680419248933442, "grad_norm": 0.07430079858041694, "learning_rate": 0.00015404561072487462, "loss": 0.8487, "step": 95010 }, { "epoch": 1.6682174897733457, "grad_norm": 0.06704008878450783, "learning_rate": 0.00015403620141989538, "loss": 0.8456, "step": 95020 }, { "epoch": 1.6683930546533472, "grad_norm": 0.05088199585829153, "learning_rate": 0.00015402679144293732, "loss": 0.841, "step": 95030 }, { "epoch": 1.6685686195333487, "grad_norm": 0.07355314004863245, "learning_rate": 0.00015401738079411965, "loss": 0.8449, "step": 95040 }, { "epoch": 1.66874418441335, "grad_norm": 0.06275682817240719, "learning_rate": 0.00015400796947356155, "loss": 0.8408, "step": 95050 }, { "epoch": 1.6689197492933514, "grad_norm": 0.059573583224777196, "learning_rate": 0.00015399855748138237, "loss": 0.8518, "step": 95060 }, { "epoch": 1.6690953141733527, "grad_norm": 0.0506518320831434, "learning_rate": 0.0001539891448177013, "loss": 0.8506, "step": 95070 }, { "epoch": 1.6692708790533541, "grad_norm": 0.06426923993348818, "learning_rate": 0.00015397973148263762, "loss": 0.853, "step": 95080 }, { "epoch": 1.6694464439333556, "grad_norm": 0.05071102500148647, "learning_rate": 0.00015397031747631059, "loss": 0.8596, "step": 95090 }, { "epoch": 1.669622008813357, "grad_norm": 0.04914567003463909, "learning_rate": 0.00015396090279883948, "loss": 0.8417, "step": 95100 }, { "epoch": 1.6697975736933584, "grad_norm": 0.058386213073507505, "learning_rate": 0.00015395148745034356, "loss": 0.8382, "step": 95110 }, { "epoch": 1.6699731385733598, "grad_norm": 0.06491351858113957, "learning_rate": 0.00015394207143094217, "loss": 0.8511, "step": 95120 }, { "epoch": 1.670148703453361, "grad_norm": 0.06224113794546137, "learning_rate": 0.00015393265474075455, "loss": 0.8473, "step": 95130 }, { "epoch": 1.6703242683333626, "grad_norm": 0.0585562309824553, "learning_rate": 0.00015392323737990012, "loss": 0.8479, "step": 95140 }, { "epoch": 1.670499833213364, "grad_norm": 0.05429424706494633, "learning_rate": 0.00015391381934849805, "loss": 0.8404, "step": 95150 }, { "epoch": 1.6706753980933655, "grad_norm": 0.06843827875008218, "learning_rate": 0.0001539044006466678, "loss": 0.8495, "step": 95160 }, { "epoch": 1.6708509629733668, "grad_norm": 0.06534873784759902, "learning_rate": 0.00015389498127452863, "loss": 0.8476, "step": 95170 }, { "epoch": 1.6710265278533682, "grad_norm": 0.06606457134853548, "learning_rate": 0.00015388556123219994, "loss": 0.8475, "step": 95180 }, { "epoch": 1.6712020927333695, "grad_norm": 0.06125756569073791, "learning_rate": 0.000153876140519801, "loss": 0.8595, "step": 95190 }, { "epoch": 1.671377657613371, "grad_norm": 0.07053746352002868, "learning_rate": 0.00015386671913745128, "loss": 0.855, "step": 95200 }, { "epoch": 1.6715532224933725, "grad_norm": 0.06120545884346809, "learning_rate": 0.0001538572970852701, "loss": 0.8519, "step": 95210 }, { "epoch": 1.671728787373374, "grad_norm": 0.07838430940555614, "learning_rate": 0.00015384787436337682, "loss": 0.8429, "step": 95220 }, { "epoch": 1.6719043522533752, "grad_norm": 0.061774839789035405, "learning_rate": 0.0001538384509718908, "loss": 0.8394, "step": 95230 }, { "epoch": 1.6720799171333767, "grad_norm": 0.053520478592809305, "learning_rate": 0.00015382902691093154, "loss": 0.8461, "step": 95240 }, { "epoch": 1.672255482013378, "grad_norm": 0.058158817898913535, "learning_rate": 0.00015381960218061836, "loss": 0.8486, "step": 95250 }, { "epoch": 1.6724310468933794, "grad_norm": 0.05923940267591351, "learning_rate": 0.0001538101767810707, "loss": 0.8519, "step": 95260 }, { "epoch": 1.6726066117733809, "grad_norm": 0.04877196405435707, "learning_rate": 0.00015380075071240798, "loss": 0.8466, "step": 95270 }, { "epoch": 1.6727821766533824, "grad_norm": 0.06851786003343383, "learning_rate": 0.00015379132397474963, "loss": 0.851, "step": 95280 }, { "epoch": 1.6729577415333836, "grad_norm": 0.048048576134409215, "learning_rate": 0.00015378189656821507, "loss": 0.8451, "step": 95290 }, { "epoch": 1.673133306413385, "grad_norm": 0.06441100145075235, "learning_rate": 0.00015377246849292378, "loss": 0.8503, "step": 95300 }, { "epoch": 1.6733088712933863, "grad_norm": 0.07299931793311638, "learning_rate": 0.00015376303974899523, "loss": 0.85, "step": 95310 }, { "epoch": 1.6734844361733878, "grad_norm": 0.06843065723408207, "learning_rate": 0.00015375361033654881, "loss": 0.8428, "step": 95320 }, { "epoch": 1.6736600010533893, "grad_norm": 0.061252414512759805, "learning_rate": 0.00015374418025570406, "loss": 0.8474, "step": 95330 }, { "epoch": 1.6738355659333908, "grad_norm": 0.0684872081491556, "learning_rate": 0.0001537347495065804, "loss": 0.8444, "step": 95340 }, { "epoch": 1.674011130813392, "grad_norm": 0.0752867684652765, "learning_rate": 0.00015372531808929737, "loss": 0.8424, "step": 95350 }, { "epoch": 1.6741866956933935, "grad_norm": 0.06798165438116349, "learning_rate": 0.0001537158860039745, "loss": 0.8465, "step": 95360 }, { "epoch": 1.6743622605733948, "grad_norm": 0.06310937524097925, "learning_rate": 0.00015370645325073123, "loss": 0.8461, "step": 95370 }, { "epoch": 1.6745378254533962, "grad_norm": 0.062022582181607604, "learning_rate": 0.0001536970198296871, "loss": 0.8461, "step": 95380 }, { "epoch": 1.6747133903333977, "grad_norm": 0.06219136708838761, "learning_rate": 0.0001536875857409616, "loss": 0.8484, "step": 95390 }, { "epoch": 1.6748889552133992, "grad_norm": 0.07287754470137178, "learning_rate": 0.0001536781509846743, "loss": 0.8498, "step": 95400 }, { "epoch": 1.6750645200934007, "grad_norm": 0.06648327186205415, "learning_rate": 0.00015366871556094473, "loss": 0.8483, "step": 95410 }, { "epoch": 1.675240084973402, "grad_norm": 0.06104804623844486, "learning_rate": 0.00015365927946989246, "loss": 0.855, "step": 95420 }, { "epoch": 1.6754156498534032, "grad_norm": 0.07156987550393536, "learning_rate": 0.00015364984271163705, "loss": 0.8421, "step": 95430 }, { "epoch": 1.6755912147334047, "grad_norm": 0.05330149035287825, "learning_rate": 0.00015364040528629798, "loss": 0.8472, "step": 95440 }, { "epoch": 1.6757667796134061, "grad_norm": 0.07240900563455192, "learning_rate": 0.00015363096719399496, "loss": 0.8462, "step": 95450 }, { "epoch": 1.6759423444934076, "grad_norm": 0.04792724640144473, "learning_rate": 0.00015362152843484742, "loss": 0.8483, "step": 95460 }, { "epoch": 1.676117909373409, "grad_norm": 0.05573833506318755, "learning_rate": 0.00015361208900897507, "loss": 0.8497, "step": 95470 }, { "epoch": 1.6762934742534104, "grad_norm": 0.06940693850166915, "learning_rate": 0.00015360264891649747, "loss": 0.8496, "step": 95480 }, { "epoch": 1.6764690391334116, "grad_norm": 0.05330598500458181, "learning_rate": 0.0001535932081575342, "loss": 0.8535, "step": 95490 }, { "epoch": 1.676644604013413, "grad_norm": 0.06355062326426408, "learning_rate": 0.00015358376673220493, "loss": 0.8496, "step": 95500 }, { "epoch": 1.6768201688934146, "grad_norm": 0.11479060705281197, "learning_rate": 0.00015357432464062926, "loss": 0.8479, "step": 95510 }, { "epoch": 1.676995733773416, "grad_norm": 0.04680207843694712, "learning_rate": 0.00015356488188292681, "loss": 0.8499, "step": 95520 }, { "epoch": 1.6771712986534175, "grad_norm": 0.05439626970373758, "learning_rate": 0.00015355543845921722, "loss": 0.8517, "step": 95530 }, { "epoch": 1.6773468635334188, "grad_norm": 0.05456687250982142, "learning_rate": 0.0001535459943696202, "loss": 0.8546, "step": 95540 }, { "epoch": 1.67752242841342, "grad_norm": 0.053634909926602345, "learning_rate": 0.00015353654961425533, "loss": 0.8517, "step": 95550 }, { "epoch": 1.6776979932934215, "grad_norm": 0.0773923600134467, "learning_rate": 0.00015352710419324231, "loss": 0.8527, "step": 95560 }, { "epoch": 1.677873558173423, "grad_norm": 0.06606235920627306, "learning_rate": 0.00015351765810670077, "loss": 0.8454, "step": 95570 }, { "epoch": 1.6780491230534245, "grad_norm": 0.05446328326585612, "learning_rate": 0.00015350821135475052, "loss": 0.8425, "step": 95580 }, { "epoch": 1.678224687933426, "grad_norm": 0.06313387935048015, "learning_rate": 0.00015349876393751114, "loss": 0.8529, "step": 95590 }, { "epoch": 1.6784002528134272, "grad_norm": 0.08051191038701772, "learning_rate": 0.00015348931585510233, "loss": 0.846, "step": 95600 }, { "epoch": 1.6785758176934285, "grad_norm": 0.05237718658569321, "learning_rate": 0.00015347986710764386, "loss": 0.8515, "step": 95610 }, { "epoch": 1.67875138257343, "grad_norm": 0.05203973058781705, "learning_rate": 0.0001534704176952554, "loss": 0.8462, "step": 95620 }, { "epoch": 1.6789269474534314, "grad_norm": 0.07672918258682997, "learning_rate": 0.00015346096761805668, "loss": 0.8499, "step": 95630 }, { "epoch": 1.6791025123334329, "grad_norm": 0.08622608620343247, "learning_rate": 0.00015345151687616747, "loss": 0.8549, "step": 95640 }, { "epoch": 1.6792780772134344, "grad_norm": 0.07599902932980918, "learning_rate": 0.00015344206546970748, "loss": 0.8494, "step": 95650 }, { "epoch": 1.6794536420934356, "grad_norm": 0.05353369189397644, "learning_rate": 0.00015343261339879644, "loss": 0.8456, "step": 95660 }, { "epoch": 1.6796292069734369, "grad_norm": 0.1123674735386603, "learning_rate": 0.0001534231606635542, "loss": 0.8517, "step": 95670 }, { "epoch": 1.6798047718534383, "grad_norm": 0.058523709188148636, "learning_rate": 0.0001534137072641004, "loss": 0.8535, "step": 95680 }, { "epoch": 1.6799803367334398, "grad_norm": 0.07731497630249619, "learning_rate": 0.0001534042532005549, "loss": 0.8472, "step": 95690 }, { "epoch": 1.6801559016134413, "grad_norm": 0.06224062651485618, "learning_rate": 0.0001533947984730375, "loss": 0.8471, "step": 95700 }, { "epoch": 1.6803314664934428, "grad_norm": 0.09533843131799141, "learning_rate": 0.00015338534308166797, "loss": 0.8445, "step": 95710 }, { "epoch": 1.680507031373444, "grad_norm": 0.06362895128683026, "learning_rate": 0.00015337588702656608, "loss": 0.8527, "step": 95720 }, { "epoch": 1.6806825962534455, "grad_norm": 0.05679390942250731, "learning_rate": 0.00015336643030785166, "loss": 0.8476, "step": 95730 }, { "epoch": 1.6808581611334468, "grad_norm": 0.05673447953000295, "learning_rate": 0.00015335697292564454, "loss": 0.8458, "step": 95740 }, { "epoch": 1.6810337260134482, "grad_norm": 0.05443225561591259, "learning_rate": 0.00015334751488006455, "loss": 0.8438, "step": 95750 }, { "epoch": 1.6812092908934497, "grad_norm": 0.05055958965335517, "learning_rate": 0.0001533380561712315, "loss": 0.8422, "step": 95760 }, { "epoch": 1.6813848557734512, "grad_norm": 0.0678798131184224, "learning_rate": 0.00015332859679926525, "loss": 0.855, "step": 95770 }, { "epoch": 1.6815604206534525, "grad_norm": 0.06453100921263666, "learning_rate": 0.00015331913676428565, "loss": 0.851, "step": 95780 }, { "epoch": 1.681735985533454, "grad_norm": 0.04748940838964168, "learning_rate": 0.00015330967606641256, "loss": 0.854, "step": 95790 }, { "epoch": 1.6819115504134552, "grad_norm": 0.0753851517040027, "learning_rate": 0.00015330021470576585, "loss": 0.8461, "step": 95800 }, { "epoch": 1.6820871152934567, "grad_norm": 0.0911950721169273, "learning_rate": 0.0001532907526824654, "loss": 0.8463, "step": 95810 }, { "epoch": 1.6822626801734581, "grad_norm": 0.07516061435204335, "learning_rate": 0.00015328128999663108, "loss": 0.8538, "step": 95820 }, { "epoch": 1.6824382450534596, "grad_norm": 0.059799068955432445, "learning_rate": 0.0001532718266483828, "loss": 0.851, "step": 95830 }, { "epoch": 1.6826138099334609, "grad_norm": 0.057756081694793404, "learning_rate": 0.00015326236263784045, "loss": 0.8485, "step": 95840 }, { "epoch": 1.6827893748134624, "grad_norm": 0.05902145850088131, "learning_rate": 0.000153252897965124, "loss": 0.8534, "step": 95850 }, { "epoch": 1.6829649396934636, "grad_norm": 0.07342560284232039, "learning_rate": 0.00015324343263035324, "loss": 0.8449, "step": 95860 }, { "epoch": 1.683140504573465, "grad_norm": 0.052068129114703966, "learning_rate": 0.00015323396663364822, "loss": 0.8446, "step": 95870 }, { "epoch": 1.6833160694534666, "grad_norm": 0.058193405558772726, "learning_rate": 0.00015322449997512883, "loss": 0.8388, "step": 95880 }, { "epoch": 1.683491634333468, "grad_norm": 0.049458443507647006, "learning_rate": 0.00015321503265491504, "loss": 0.8509, "step": 95890 }, { "epoch": 1.6836671992134693, "grad_norm": 0.04420036819366715, "learning_rate": 0.00015320556467312673, "loss": 0.8509, "step": 95900 }, { "epoch": 1.6838427640934708, "grad_norm": 0.04675505073036924, "learning_rate": 0.00015319609602988394, "loss": 0.8521, "step": 95910 }, { "epoch": 1.684018328973472, "grad_norm": 0.05789436408873242, "learning_rate": 0.0001531866267253066, "loss": 0.8435, "step": 95920 }, { "epoch": 1.6841938938534735, "grad_norm": 0.05395577530555509, "learning_rate": 0.0001531771567595147, "loss": 0.8523, "step": 95930 }, { "epoch": 1.684369458733475, "grad_norm": 0.07056038849606883, "learning_rate": 0.00015316768613262823, "loss": 0.8434, "step": 95940 }, { "epoch": 1.6845450236134765, "grad_norm": 0.04684354664580664, "learning_rate": 0.00015315821484476717, "loss": 0.8444, "step": 95950 }, { "epoch": 1.6847205884934777, "grad_norm": 0.049010913957714175, "learning_rate": 0.00015314874289605157, "loss": 0.8444, "step": 95960 }, { "epoch": 1.6848961533734792, "grad_norm": 0.04398224562191143, "learning_rate": 0.00015313927028660134, "loss": 0.8612, "step": 95970 }, { "epoch": 1.6850717182534805, "grad_norm": 0.08558575666911754, "learning_rate": 0.00015312979701653657, "loss": 0.8522, "step": 95980 }, { "epoch": 1.685247283133482, "grad_norm": 0.07735970110752302, "learning_rate": 0.00015312032308597735, "loss": 0.8415, "step": 95990 }, { "epoch": 1.6854228480134834, "grad_norm": 0.0646837853224522, "learning_rate": 0.00015311084849504357, "loss": 0.8596, "step": 96000 }, { "epoch": 1.6855984128934849, "grad_norm": 0.058455583545565896, "learning_rate": 0.00015310137324385542, "loss": 0.8446, "step": 96010 }, { "epoch": 1.6857739777734861, "grad_norm": 0.056098470236429394, "learning_rate": 0.00015309189733253282, "loss": 0.8524, "step": 96020 }, { "epoch": 1.6859495426534876, "grad_norm": 0.0476578238272854, "learning_rate": 0.00015308242076119595, "loss": 0.8512, "step": 96030 }, { "epoch": 1.6861251075334889, "grad_norm": 0.05147881136538946, "learning_rate": 0.0001530729435299648, "loss": 0.8487, "step": 96040 }, { "epoch": 1.6863006724134904, "grad_norm": 0.05665635325463302, "learning_rate": 0.0001530634656389595, "loss": 0.8457, "step": 96050 }, { "epoch": 1.6864762372934918, "grad_norm": 0.07736988525648364, "learning_rate": 0.00015305398708830007, "loss": 0.8345, "step": 96060 }, { "epoch": 1.6866518021734933, "grad_norm": 0.056845483251915935, "learning_rate": 0.00015304450787810667, "loss": 0.8478, "step": 96070 }, { "epoch": 1.6868273670534946, "grad_norm": 0.08621970172970536, "learning_rate": 0.00015303502800849937, "loss": 0.8561, "step": 96080 }, { "epoch": 1.687002931933496, "grad_norm": 0.06612637987095707, "learning_rate": 0.00015302554747959832, "loss": 0.8456, "step": 96090 }, { "epoch": 1.6871784968134973, "grad_norm": 0.05269822690879243, "learning_rate": 0.0001530160662915236, "loss": 0.8562, "step": 96100 }, { "epoch": 1.6873540616934988, "grad_norm": 0.05247681482313725, "learning_rate": 0.00015300658444439535, "loss": 0.8535, "step": 96110 }, { "epoch": 1.6875296265735003, "grad_norm": 0.05520266802168007, "learning_rate": 0.0001529971019383337, "loss": 0.8499, "step": 96120 }, { "epoch": 1.6877051914535017, "grad_norm": 0.05079496466075705, "learning_rate": 0.0001529876187734588, "loss": 0.8482, "step": 96130 }, { "epoch": 1.6878807563335032, "grad_norm": 0.1031477037999305, "learning_rate": 0.00015297813494989082, "loss": 0.8481, "step": 96140 }, { "epoch": 1.6880563212135045, "grad_norm": 0.04956608870164817, "learning_rate": 0.00015296865046774988, "loss": 0.8423, "step": 96150 }, { "epoch": 1.6882318860935057, "grad_norm": 0.06597760119517432, "learning_rate": 0.00015295916532715625, "loss": 0.8501, "step": 96160 }, { "epoch": 1.6884074509735072, "grad_norm": 0.08706964395327943, "learning_rate": 0.00015294967952822998, "loss": 0.8443, "step": 96170 }, { "epoch": 1.6885830158535087, "grad_norm": 0.055574154082522496, "learning_rate": 0.00015294019307109132, "loss": 0.8457, "step": 96180 }, { "epoch": 1.6887585807335102, "grad_norm": 0.05817518412213656, "learning_rate": 0.0001529307059558605, "loss": 0.8486, "step": 96190 }, { "epoch": 1.6889341456135116, "grad_norm": 0.05807971053085143, "learning_rate": 0.00015292121818265758, "loss": 0.85, "step": 96200 }, { "epoch": 1.6891097104935129, "grad_norm": 0.07352018798856931, "learning_rate": 0.00015291172975160294, "loss": 0.8486, "step": 96210 }, { "epoch": 1.6892852753735141, "grad_norm": 0.06676868817104249, "learning_rate": 0.00015290224066281675, "loss": 0.8414, "step": 96220 }, { "epoch": 1.6894608402535156, "grad_norm": 0.055505827217964876, "learning_rate": 0.00015289275091641925, "loss": 0.8575, "step": 96230 }, { "epoch": 1.689636405133517, "grad_norm": 0.09653288313157303, "learning_rate": 0.00015288326051253058, "loss": 0.855, "step": 96240 }, { "epoch": 1.6898119700135186, "grad_norm": 0.06005540229185079, "learning_rate": 0.0001528737694512711, "loss": 0.8473, "step": 96250 }, { "epoch": 1.68998753489352, "grad_norm": 0.05445235289326103, "learning_rate": 0.000152864277732761, "loss": 0.8519, "step": 96260 }, { "epoch": 1.6901630997735213, "grad_norm": 0.061782429595853464, "learning_rate": 0.00015285478535712057, "loss": 0.8457, "step": 96270 }, { "epoch": 1.6903386646535226, "grad_norm": 0.0655046105703986, "learning_rate": 0.0001528452923244701, "loss": 0.8509, "step": 96280 }, { "epoch": 1.690514229533524, "grad_norm": 0.06686896823782812, "learning_rate": 0.00015283579863492982, "loss": 0.8429, "step": 96290 }, { "epoch": 1.6906897944135255, "grad_norm": 0.08345840985561094, "learning_rate": 0.00015282630428862002, "loss": 0.846, "step": 96300 }, { "epoch": 1.690865359293527, "grad_norm": 0.05598847692007748, "learning_rate": 0.000152816809285661, "loss": 0.8491, "step": 96310 }, { "epoch": 1.6910409241735285, "grad_norm": 0.05575537931613184, "learning_rate": 0.0001528073136261731, "loss": 0.8512, "step": 96320 }, { "epoch": 1.6912164890535297, "grad_norm": 0.06171842138710155, "learning_rate": 0.00015279781731027662, "loss": 0.8456, "step": 96330 }, { "epoch": 1.691392053933531, "grad_norm": 0.04751745085695837, "learning_rate": 0.00015278832033809183, "loss": 0.8427, "step": 96340 }, { "epoch": 1.6915676188135325, "grad_norm": 0.05778163087762041, "learning_rate": 0.0001527788227097391, "loss": 0.8453, "step": 96350 }, { "epoch": 1.691743183693534, "grad_norm": 0.05416033228935911, "learning_rate": 0.0001527693244253388, "loss": 0.8448, "step": 96360 }, { "epoch": 1.6919187485735354, "grad_norm": 0.04948214536409534, "learning_rate": 0.00015275982548501122, "loss": 0.8516, "step": 96370 }, { "epoch": 1.692094313453537, "grad_norm": 0.05417735215233099, "learning_rate": 0.00015275032588887673, "loss": 0.8399, "step": 96380 }, { "epoch": 1.6922698783335381, "grad_norm": 0.05413940771508463, "learning_rate": 0.0001527408256370557, "loss": 0.8495, "step": 96390 }, { "epoch": 1.6924454432135394, "grad_norm": 0.06796335657124483, "learning_rate": 0.0001527313247296685, "loss": 0.8425, "step": 96400 }, { "epoch": 1.6926210080935409, "grad_norm": 0.04484990299952155, "learning_rate": 0.00015272182316683548, "loss": 0.8401, "step": 96410 }, { "epoch": 1.6927965729735424, "grad_norm": 0.052560842892518, "learning_rate": 0.00015271232094867706, "loss": 0.8517, "step": 96420 }, { "epoch": 1.6929721378535438, "grad_norm": 0.05599336106732716, "learning_rate": 0.0001527028180753136, "loss": 0.8434, "step": 96430 }, { "epoch": 1.6931477027335453, "grad_norm": 0.05842751937618904, "learning_rate": 0.00015269331454686555, "loss": 0.8493, "step": 96440 }, { "epoch": 1.6933232676135466, "grad_norm": 0.060900726305751905, "learning_rate": 0.0001526838103634533, "loss": 0.8446, "step": 96450 }, { "epoch": 1.693498832493548, "grad_norm": 0.0551389171565461, "learning_rate": 0.00015267430552519726, "loss": 0.8573, "step": 96460 }, { "epoch": 1.6936743973735493, "grad_norm": 0.06546728634941128, "learning_rate": 0.00015266480003221786, "loss": 0.8486, "step": 96470 }, { "epoch": 1.6938499622535508, "grad_norm": 0.08209163801877611, "learning_rate": 0.00015265529388463556, "loss": 0.8452, "step": 96480 }, { "epoch": 1.6940255271335523, "grad_norm": 0.05266890950945589, "learning_rate": 0.00015264578708257078, "loss": 0.8433, "step": 96490 }, { "epoch": 1.6942010920135537, "grad_norm": 0.10191373662920002, "learning_rate": 0.00015263627962614398, "loss": 0.8507, "step": 96500 }, { "epoch": 1.694376656893555, "grad_norm": 0.11211153293079006, "learning_rate": 0.00015262677151547558, "loss": 0.8494, "step": 96510 }, { "epoch": 1.6945522217735565, "grad_norm": 0.06401182431473064, "learning_rate": 0.00015261726275068616, "loss": 0.8451, "step": 96520 }, { "epoch": 1.6947277866535577, "grad_norm": 0.046550528789999414, "learning_rate": 0.00015260775333189607, "loss": 0.8421, "step": 96530 }, { "epoch": 1.6949033515335592, "grad_norm": 0.08728222971297829, "learning_rate": 0.00015259824325922585, "loss": 0.8447, "step": 96540 }, { "epoch": 1.6950789164135607, "grad_norm": 0.06498825139418961, "learning_rate": 0.00015258873253279606, "loss": 0.8374, "step": 96550 }, { "epoch": 1.6952544812935622, "grad_norm": 0.06957384071261191, "learning_rate": 0.00015257922115272712, "loss": 0.8452, "step": 96560 }, { "epoch": 1.6954300461735634, "grad_norm": 0.062241826806331606, "learning_rate": 0.00015256970911913957, "loss": 0.8363, "step": 96570 }, { "epoch": 1.6956056110535649, "grad_norm": 0.06415715931939064, "learning_rate": 0.0001525601964321539, "loss": 0.8493, "step": 96580 }, { "epoch": 1.6957811759335661, "grad_norm": 0.045724100671040026, "learning_rate": 0.00015255068309189064, "loss": 0.8571, "step": 96590 }, { "epoch": 1.6959567408135676, "grad_norm": 0.04681718095428107, "learning_rate": 0.00015254116909847038, "loss": 0.8393, "step": 96600 }, { "epoch": 1.696132305693569, "grad_norm": 0.06634265107436811, "learning_rate": 0.00015253165445201363, "loss": 0.8424, "step": 96610 }, { "epoch": 1.6963078705735706, "grad_norm": 0.05459139069820399, "learning_rate": 0.00015252213915264092, "loss": 0.8439, "step": 96620 }, { "epoch": 1.6964834354535718, "grad_norm": 0.0602764804554293, "learning_rate": 0.00015251262320047283, "loss": 0.8535, "step": 96630 }, { "epoch": 1.6966590003335733, "grad_norm": 0.04821388025388649, "learning_rate": 0.00015250310659562996, "loss": 0.8458, "step": 96640 }, { "epoch": 1.6968345652135746, "grad_norm": 0.05228930550179565, "learning_rate": 0.00015249358933823283, "loss": 0.846, "step": 96650 }, { "epoch": 1.697010130093576, "grad_norm": 0.04887150140854072, "learning_rate": 0.00015248407142840208, "loss": 0.8494, "step": 96660 }, { "epoch": 1.6971856949735775, "grad_norm": 0.06473050772818545, "learning_rate": 0.0001524745528662583, "loss": 0.8501, "step": 96670 }, { "epoch": 1.697361259853579, "grad_norm": 0.08292974962165586, "learning_rate": 0.00015246503365192203, "loss": 0.8492, "step": 96680 }, { "epoch": 1.6975368247335803, "grad_norm": 0.054037633892485455, "learning_rate": 0.00015245551378551393, "loss": 0.841, "step": 96690 }, { "epoch": 1.6977123896135817, "grad_norm": 0.06203524927204946, "learning_rate": 0.0001524459932671546, "loss": 0.8565, "step": 96700 }, { "epoch": 1.697887954493583, "grad_norm": 0.052700555943860895, "learning_rate": 0.00015243647209696467, "loss": 0.854, "step": 96710 }, { "epoch": 1.6980635193735845, "grad_norm": 0.05695772613794107, "learning_rate": 0.00015242695027506482, "loss": 0.8507, "step": 96720 }, { "epoch": 1.698239084253586, "grad_norm": 0.0747400954506372, "learning_rate": 0.00015241742780157562, "loss": 0.8442, "step": 96730 }, { "epoch": 1.6984146491335874, "grad_norm": 0.062193536117496305, "learning_rate": 0.00015240790467661774, "loss": 0.8416, "step": 96740 }, { "epoch": 1.6985902140135887, "grad_norm": 0.055259148204471024, "learning_rate": 0.00015239838090031187, "loss": 0.8437, "step": 96750 }, { "epoch": 1.6987657788935902, "grad_norm": 0.04530963705258118, "learning_rate": 0.0001523888564727787, "loss": 0.859, "step": 96760 }, { "epoch": 1.6989413437735914, "grad_norm": 0.06705087334145746, "learning_rate": 0.00015237933139413881, "loss": 0.8461, "step": 96770 }, { "epoch": 1.6991169086535929, "grad_norm": 0.05474999519941859, "learning_rate": 0.00015236980566451299, "loss": 0.8493, "step": 96780 }, { "epoch": 1.6992924735335944, "grad_norm": 0.057956140036380936, "learning_rate": 0.0001523602792840219, "loss": 0.8454, "step": 96790 }, { "epoch": 1.6994680384135958, "grad_norm": 0.0495773080281708, "learning_rate": 0.0001523507522527862, "loss": 0.8452, "step": 96800 }, { "epoch": 1.699643603293597, "grad_norm": 0.046600082266349645, "learning_rate": 0.00015234122457092662, "loss": 0.8498, "step": 96810 }, { "epoch": 1.6998191681735986, "grad_norm": 0.059998449426231185, "learning_rate": 0.00015233169623856392, "loss": 0.8496, "step": 96820 }, { "epoch": 1.6999947330535998, "grad_norm": 0.057900225516074384, "learning_rate": 0.00015232216725581877, "loss": 0.8429, "step": 96830 }, { "epoch": 1.7001702979336013, "grad_norm": 0.056153029710077094, "learning_rate": 0.0001523126376228119, "loss": 0.8466, "step": 96840 }, { "epoch": 1.7003458628136028, "grad_norm": 0.06070327524053241, "learning_rate": 0.00015230310733966408, "loss": 0.8485, "step": 96850 }, { "epoch": 1.7005214276936043, "grad_norm": 0.060566344893630684, "learning_rate": 0.00015229357640649611, "loss": 0.8455, "step": 96860 }, { "epoch": 1.7006969925736057, "grad_norm": 0.04643852814790019, "learning_rate": 0.00015228404482342867, "loss": 0.8557, "step": 96870 }, { "epoch": 1.700872557453607, "grad_norm": 0.04553816657605914, "learning_rate": 0.00015227451259058251, "loss": 0.8428, "step": 96880 }, { "epoch": 1.7010481223336082, "grad_norm": 0.07981304854822649, "learning_rate": 0.00015226497970807852, "loss": 0.8532, "step": 96890 }, { "epoch": 1.7012236872136097, "grad_norm": 0.05849282962822837, "learning_rate": 0.00015225544617603734, "loss": 0.8471, "step": 96900 }, { "epoch": 1.7013992520936112, "grad_norm": 0.05426745131305374, "learning_rate": 0.00015224591199457992, "loss": 0.8484, "step": 96910 }, { "epoch": 1.7015748169736127, "grad_norm": 0.06218703066181585, "learning_rate": 0.0001522363771638269, "loss": 0.8486, "step": 96920 }, { "epoch": 1.7017503818536142, "grad_norm": 0.08791812568717294, "learning_rate": 0.0001522268416838992, "loss": 0.847, "step": 96930 }, { "epoch": 1.7019259467336154, "grad_norm": 0.06566558096283807, "learning_rate": 0.00015221730555491756, "loss": 0.8449, "step": 96940 }, { "epoch": 1.7021015116136167, "grad_norm": 0.09674042422015741, "learning_rate": 0.00015220776877700287, "loss": 0.8433, "step": 96950 }, { "epoch": 1.7022770764936181, "grad_norm": 0.04248990719558457, "learning_rate": 0.00015219823135027597, "loss": 0.842, "step": 96960 }, { "epoch": 1.7024526413736196, "grad_norm": 0.057485654677226945, "learning_rate": 0.00015218869327485762, "loss": 0.847, "step": 96970 }, { "epoch": 1.702628206253621, "grad_norm": 0.0574137770080803, "learning_rate": 0.00015217915455086872, "loss": 0.8499, "step": 96980 }, { "epoch": 1.7028037711336226, "grad_norm": 0.05542091880855439, "learning_rate": 0.0001521696151784301, "loss": 0.8493, "step": 96990 }, { "epoch": 1.7029793360136238, "grad_norm": 0.06630582203921802, "learning_rate": 0.0001521600751576627, "loss": 0.8561, "step": 97000 }, { "epoch": 1.703154900893625, "grad_norm": 0.07199614064867678, "learning_rate": 0.00015215053448868734, "loss": 0.8532, "step": 97010 }, { "epoch": 1.7033304657736266, "grad_norm": 0.05367124904852347, "learning_rate": 0.00015214099317162487, "loss": 0.8418, "step": 97020 }, { "epoch": 1.703506030653628, "grad_norm": 0.04846972956662557, "learning_rate": 0.00015213145120659624, "loss": 0.8468, "step": 97030 }, { "epoch": 1.7036815955336295, "grad_norm": 0.049354321984957074, "learning_rate": 0.0001521219085937223, "loss": 0.8399, "step": 97040 }, { "epoch": 1.703857160413631, "grad_norm": 0.058078207010873184, "learning_rate": 0.000152112365333124, "loss": 0.856, "step": 97050 }, { "epoch": 1.7040327252936323, "grad_norm": 0.07702379027825147, "learning_rate": 0.00015210282142492224, "loss": 0.8434, "step": 97060 }, { "epoch": 1.7042082901736335, "grad_norm": 0.05348775652422329, "learning_rate": 0.00015209327686923794, "loss": 0.8458, "step": 97070 }, { "epoch": 1.704383855053635, "grad_norm": 0.05755630910175806, "learning_rate": 0.00015208373166619203, "loss": 0.8537, "step": 97080 }, { "epoch": 1.7045594199336365, "grad_norm": 0.04905357836689019, "learning_rate": 0.00015207418581590542, "loss": 0.8549, "step": 97090 }, { "epoch": 1.704734984813638, "grad_norm": 0.053901674504769174, "learning_rate": 0.00015206463931849911, "loss": 0.848, "step": 97100 }, { "epoch": 1.7049105496936394, "grad_norm": 0.04922649370274321, "learning_rate": 0.00015205509217409402, "loss": 0.8471, "step": 97110 }, { "epoch": 1.7050861145736407, "grad_norm": 0.07326526152282936, "learning_rate": 0.00015204554438281117, "loss": 0.842, "step": 97120 }, { "epoch": 1.705261679453642, "grad_norm": 0.06873451698597038, "learning_rate": 0.00015203599594477144, "loss": 0.8545, "step": 97130 }, { "epoch": 1.7054372443336434, "grad_norm": 0.05090565020169309, "learning_rate": 0.0001520264468600959, "loss": 0.8424, "step": 97140 }, { "epoch": 1.7056128092136449, "grad_norm": 0.05552458696105793, "learning_rate": 0.00015201689712890547, "loss": 0.8478, "step": 97150 }, { "epoch": 1.7057883740936464, "grad_norm": 0.0640305447375787, "learning_rate": 0.0001520073467513212, "loss": 0.8489, "step": 97160 }, { "epoch": 1.7059639389736478, "grad_norm": 0.05627944546993739, "learning_rate": 0.00015199779572746405, "loss": 0.8354, "step": 97170 }, { "epoch": 1.706139503853649, "grad_norm": 0.05628907161396378, "learning_rate": 0.00015198824405745505, "loss": 0.8436, "step": 97180 }, { "epoch": 1.7063150687336506, "grad_norm": 0.07530609252913746, "learning_rate": 0.00015197869174141525, "loss": 0.8422, "step": 97190 }, { "epoch": 1.7064906336136518, "grad_norm": 0.051074991984839745, "learning_rate": 0.00015196913877946563, "loss": 0.8418, "step": 97200 }, { "epoch": 1.7066661984936533, "grad_norm": 0.04579567871685326, "learning_rate": 0.00015195958517172725, "loss": 0.8507, "step": 97210 }, { "epoch": 1.7068417633736548, "grad_norm": 0.057851434183724154, "learning_rate": 0.00015195003091832116, "loss": 0.8466, "step": 97220 }, { "epoch": 1.7070173282536563, "grad_norm": 0.0718946159506703, "learning_rate": 0.00015194047601936847, "loss": 0.8409, "step": 97230 }, { "epoch": 1.7071928931336575, "grad_norm": 0.05158292239217141, "learning_rate": 0.00015193092047499014, "loss": 0.8503, "step": 97240 }, { "epoch": 1.707368458013659, "grad_norm": 0.06176464937168185, "learning_rate": 0.0001519213642853073, "loss": 0.8543, "step": 97250 }, { "epoch": 1.7075440228936603, "grad_norm": 0.0815471584692428, "learning_rate": 0.00015191180745044097, "loss": 0.8452, "step": 97260 }, { "epoch": 1.7077195877736617, "grad_norm": 0.04516434460157234, "learning_rate": 0.00015190224997051234, "loss": 0.848, "step": 97270 }, { "epoch": 1.7078951526536632, "grad_norm": 0.05435522560418124, "learning_rate": 0.00015189269184564242, "loss": 0.8463, "step": 97280 }, { "epoch": 1.7080707175336647, "grad_norm": 0.06781501584923312, "learning_rate": 0.00015188313307595237, "loss": 0.8463, "step": 97290 }, { "epoch": 1.708246282413666, "grad_norm": 0.06896512321811728, "learning_rate": 0.00015187357366156325, "loss": 0.8419, "step": 97300 }, { "epoch": 1.7084218472936674, "grad_norm": 0.06834619766394862, "learning_rate": 0.00015186401360259616, "loss": 0.8478, "step": 97310 }, { "epoch": 1.7085974121736687, "grad_norm": 0.06264427121814779, "learning_rate": 0.00015185445289917236, "loss": 0.8438, "step": 97320 }, { "epoch": 1.7087729770536702, "grad_norm": 0.06009883897394693, "learning_rate": 0.00015184489155141282, "loss": 0.8544, "step": 97330 }, { "epoch": 1.7089485419336716, "grad_norm": 0.06423539694661339, "learning_rate": 0.0001518353295594388, "loss": 0.8535, "step": 97340 }, { "epoch": 1.709124106813673, "grad_norm": 0.07209871891309425, "learning_rate": 0.0001518257669233714, "loss": 0.8467, "step": 97350 }, { "epoch": 1.7092996716936744, "grad_norm": 0.048110879210878206, "learning_rate": 0.00015181620364333178, "loss": 0.8505, "step": 97360 }, { "epoch": 1.7094752365736758, "grad_norm": 0.06938176658558055, "learning_rate": 0.00015180663971944115, "loss": 0.8446, "step": 97370 }, { "epoch": 1.709650801453677, "grad_norm": 0.047128069856970385, "learning_rate": 0.00015179707515182062, "loss": 0.8395, "step": 97380 }, { "epoch": 1.7098263663336786, "grad_norm": 0.07592155274503691, "learning_rate": 0.00015178750994059142, "loss": 0.8404, "step": 97390 }, { "epoch": 1.71000193121368, "grad_norm": 0.07280163078498068, "learning_rate": 0.0001517779440858748, "loss": 0.8525, "step": 97400 }, { "epoch": 1.7101774960936815, "grad_norm": 0.06084006311786117, "learning_rate": 0.00015176837758779183, "loss": 0.8514, "step": 97410 }, { "epoch": 1.7103530609736828, "grad_norm": 0.06665903883822748, "learning_rate": 0.0001517588104464638, "loss": 0.8448, "step": 97420 }, { "epoch": 1.7105286258536843, "grad_norm": 0.07179751429173435, "learning_rate": 0.0001517492426620119, "loss": 0.8516, "step": 97430 }, { "epoch": 1.7107041907336855, "grad_norm": 0.06683140238114314, "learning_rate": 0.0001517396742345574, "loss": 0.8432, "step": 97440 }, { "epoch": 1.710879755613687, "grad_norm": 0.06690344525670983, "learning_rate": 0.00015173010516422145, "loss": 0.8395, "step": 97450 }, { "epoch": 1.7110553204936885, "grad_norm": 0.06449953662530392, "learning_rate": 0.0001517205354511254, "loss": 0.8419, "step": 97460 }, { "epoch": 1.71123088537369, "grad_norm": 0.06959404164797721, "learning_rate": 0.00015171096509539045, "loss": 0.8362, "step": 97470 }, { "epoch": 1.7114064502536912, "grad_norm": 0.06943553561691121, "learning_rate": 0.0001517013940971378, "loss": 0.8525, "step": 97480 }, { "epoch": 1.7115820151336927, "grad_norm": 0.06328248546525903, "learning_rate": 0.00015169182245648884, "loss": 0.8518, "step": 97490 }, { "epoch": 1.711757580013694, "grad_norm": 0.08549568593175405, "learning_rate": 0.00015168225017356471, "loss": 0.8485, "step": 97500 }, { "epoch": 1.7119331448936954, "grad_norm": 0.06960141178321681, "learning_rate": 0.0001516726772484868, "loss": 0.8475, "step": 97510 }, { "epoch": 1.712108709773697, "grad_norm": 0.08496695567732052, "learning_rate": 0.00015166310368137632, "loss": 0.8402, "step": 97520 }, { "epoch": 1.7122842746536984, "grad_norm": 0.0640160459559346, "learning_rate": 0.00015165352947235465, "loss": 0.8501, "step": 97530 }, { "epoch": 1.7124598395336996, "grad_norm": 0.06925778335289651, "learning_rate": 0.00015164395462154303, "loss": 0.8503, "step": 97540 }, { "epoch": 1.712635404413701, "grad_norm": 0.0537319065163397, "learning_rate": 0.0001516343791290628, "loss": 0.8502, "step": 97550 }, { "epoch": 1.7128109692937024, "grad_norm": 0.07899014804940356, "learning_rate": 0.00015162480299503526, "loss": 0.845, "step": 97560 }, { "epoch": 1.7129865341737038, "grad_norm": 0.06860505434857679, "learning_rate": 0.00015161522621958182, "loss": 0.8434, "step": 97570 }, { "epoch": 1.7131620990537053, "grad_norm": 0.06582941069962257, "learning_rate": 0.00015160564880282372, "loss": 0.8457, "step": 97580 }, { "epoch": 1.7133376639337068, "grad_norm": 0.1444411138095392, "learning_rate": 0.00015159607074488235, "loss": 0.8466, "step": 97590 }, { "epoch": 1.7135132288137083, "grad_norm": 0.05393305766164613, "learning_rate": 0.00015158649204587908, "loss": 0.8398, "step": 97600 }, { "epoch": 1.7136887936937095, "grad_norm": 0.05914502897491321, "learning_rate": 0.00015157691270593527, "loss": 0.852, "step": 97610 }, { "epoch": 1.7138643585737108, "grad_norm": 0.0610326070649969, "learning_rate": 0.00015156733272517225, "loss": 0.8458, "step": 97620 }, { "epoch": 1.7140399234537123, "grad_norm": 0.07203019092968875, "learning_rate": 0.00015155775210371145, "loss": 0.8499, "step": 97630 }, { "epoch": 1.7142154883337137, "grad_norm": 0.07414454684997046, "learning_rate": 0.00015154817084167427, "loss": 0.8475, "step": 97640 }, { "epoch": 1.7143910532137152, "grad_norm": 0.06866403970633297, "learning_rate": 0.00015153858893918206, "loss": 0.8573, "step": 97650 }, { "epoch": 1.7145666180937167, "grad_norm": 0.08604900166023467, "learning_rate": 0.00015152900639635623, "loss": 0.8424, "step": 97660 }, { "epoch": 1.714742182973718, "grad_norm": 0.05617215135391306, "learning_rate": 0.0001515194232133182, "loss": 0.8436, "step": 97670 }, { "epoch": 1.7149177478537192, "grad_norm": 0.05800239081460891, "learning_rate": 0.0001515098393901894, "loss": 0.8468, "step": 97680 }, { "epoch": 1.7150933127337207, "grad_norm": 0.08011335604287692, "learning_rate": 0.0001515002549270913, "loss": 0.8483, "step": 97690 }, { "epoch": 1.7152688776137222, "grad_norm": 0.11123972771934064, "learning_rate": 0.00015149066982414526, "loss": 0.8473, "step": 97700 }, { "epoch": 1.7154444424937236, "grad_norm": 0.0567471883454452, "learning_rate": 0.00015148108408147274, "loss": 0.8416, "step": 97710 }, { "epoch": 1.715620007373725, "grad_norm": 0.06458314086989997, "learning_rate": 0.0001514714976991952, "loss": 0.8544, "step": 97720 }, { "epoch": 1.7157955722537264, "grad_norm": 0.05754184953355685, "learning_rate": 0.00015146191067743413, "loss": 0.8568, "step": 97730 }, { "epoch": 1.7159711371337276, "grad_norm": 0.05113665715830573, "learning_rate": 0.000151452323016311, "loss": 0.8511, "step": 97740 }, { "epoch": 1.716146702013729, "grad_norm": 0.06205559743317499, "learning_rate": 0.00015144273471594724, "loss": 0.849, "step": 97750 }, { "epoch": 1.7163222668937306, "grad_norm": 0.05460825503843154, "learning_rate": 0.00015143314577646438, "loss": 0.8448, "step": 97760 }, { "epoch": 1.716497831773732, "grad_norm": 0.06598672255920433, "learning_rate": 0.00015142355619798389, "loss": 0.8501, "step": 97770 }, { "epoch": 1.7166733966537335, "grad_norm": 0.05770046131034454, "learning_rate": 0.0001514139659806273, "loss": 0.853, "step": 97780 }, { "epoch": 1.7168489615337348, "grad_norm": 0.05957000131048351, "learning_rate": 0.0001514043751245161, "loss": 0.8451, "step": 97790 }, { "epoch": 1.717024526413736, "grad_norm": 0.051348686645230374, "learning_rate": 0.00015139478362977178, "loss": 0.8508, "step": 97800 }, { "epoch": 1.7172000912937375, "grad_norm": 0.06158751378237986, "learning_rate": 0.0001513851914965159, "loss": 0.8414, "step": 97810 }, { "epoch": 1.717375656173739, "grad_norm": 0.04212268388479152, "learning_rate": 0.00015137559872486998, "loss": 0.8481, "step": 97820 }, { "epoch": 1.7175512210537405, "grad_norm": 0.06371793908169576, "learning_rate": 0.00015136600531495556, "loss": 0.85, "step": 97830 }, { "epoch": 1.717726785933742, "grad_norm": 0.0577834639275934, "learning_rate": 0.00015135641126689424, "loss": 0.8527, "step": 97840 }, { "epoch": 1.7179023508137432, "grad_norm": 0.06913039443535143, "learning_rate": 0.0001513468165808075, "loss": 0.8488, "step": 97850 }, { "epoch": 1.7180779156937445, "grad_norm": 0.05206835825371254, "learning_rate": 0.000151337221256817, "loss": 0.8547, "step": 97860 }, { "epoch": 1.718253480573746, "grad_norm": 0.049430107612323464, "learning_rate": 0.00015132762529504421, "loss": 0.8499, "step": 97870 }, { "epoch": 1.7184290454537474, "grad_norm": 0.04945564506686379, "learning_rate": 0.00015131802869561078, "loss": 0.8491, "step": 97880 }, { "epoch": 1.718604610333749, "grad_norm": 0.043315430906840556, "learning_rate": 0.00015130843145863828, "loss": 0.845, "step": 97890 }, { "epoch": 1.7187801752137504, "grad_norm": 0.0480021777963715, "learning_rate": 0.00015129883358424827, "loss": 0.8478, "step": 97900 }, { "epoch": 1.7189557400937516, "grad_norm": 0.044041428477260926, "learning_rate": 0.0001512892350725625, "loss": 0.8372, "step": 97910 }, { "epoch": 1.719131304973753, "grad_norm": 0.05046376732321836, "learning_rate": 0.0001512796359237024, "loss": 0.8586, "step": 97920 }, { "epoch": 1.7193068698537544, "grad_norm": 0.06549957805479635, "learning_rate": 0.0001512700361377897, "loss": 0.8488, "step": 97930 }, { "epoch": 1.7194824347337558, "grad_norm": 0.05671870533216356, "learning_rate": 0.000151260435714946, "loss": 0.8441, "step": 97940 }, { "epoch": 1.7196579996137573, "grad_norm": 0.052509425481869605, "learning_rate": 0.00015125083465529298, "loss": 0.8506, "step": 97950 }, { "epoch": 1.7198335644937588, "grad_norm": 0.07854184336382379, "learning_rate": 0.00015124123295895222, "loss": 0.8403, "step": 97960 }, { "epoch": 1.72000912937376, "grad_norm": 0.061280758725232795, "learning_rate": 0.0001512316306260454, "loss": 0.8521, "step": 97970 }, { "epoch": 1.7201846942537615, "grad_norm": 0.06095638597521177, "learning_rate": 0.00015122202765669422, "loss": 0.8505, "step": 97980 }, { "epoch": 1.7203602591337628, "grad_norm": 0.059636760109282975, "learning_rate": 0.0001512124240510203, "loss": 0.8425, "step": 97990 }, { "epoch": 1.7205358240137643, "grad_norm": 0.05735675362163551, "learning_rate": 0.00015120281980914537, "loss": 0.8486, "step": 98000 }, { "epoch": 1.7207113888937657, "grad_norm": 0.053229757208893155, "learning_rate": 0.00015119321493119108, "loss": 0.8515, "step": 98010 }, { "epoch": 1.7208869537737672, "grad_norm": 0.09178269118652384, "learning_rate": 0.00015118360941727917, "loss": 0.8456, "step": 98020 }, { "epoch": 1.7210625186537685, "grad_norm": 0.07440545733216995, "learning_rate": 0.00015117400326753125, "loss": 0.8499, "step": 98030 }, { "epoch": 1.72123808353377, "grad_norm": 0.05858932919443485, "learning_rate": 0.00015116439648206915, "loss": 0.8546, "step": 98040 }, { "epoch": 1.7214136484137712, "grad_norm": 0.06337174441173236, "learning_rate": 0.00015115478906101447, "loss": 0.8503, "step": 98050 }, { "epoch": 1.7215892132937727, "grad_norm": 0.05964859620654927, "learning_rate": 0.00015114518100448906, "loss": 0.8504, "step": 98060 }, { "epoch": 1.7217647781737742, "grad_norm": 0.06078035909319704, "learning_rate": 0.00015113557231261456, "loss": 0.8437, "step": 98070 }, { "epoch": 1.7219403430537756, "grad_norm": 0.0657497772935001, "learning_rate": 0.00015112596298551278, "loss": 0.8547, "step": 98080 }, { "epoch": 1.722115907933777, "grad_norm": 0.050095497660575534, "learning_rate": 0.0001511163530233054, "loss": 0.8524, "step": 98090 }, { "epoch": 1.7222914728137784, "grad_norm": 0.05611491542911289, "learning_rate": 0.00015110674242611424, "loss": 0.8516, "step": 98100 }, { "epoch": 1.7224670376937796, "grad_norm": 0.08214345862642192, "learning_rate": 0.0001510971311940611, "loss": 0.8517, "step": 98110 }, { "epoch": 1.722642602573781, "grad_norm": 0.054228166412268027, "learning_rate": 0.00015108751932726763, "loss": 0.8475, "step": 98120 }, { "epoch": 1.7228181674537826, "grad_norm": 0.05903965700034488, "learning_rate": 0.00015107790682585573, "loss": 0.8532, "step": 98130 }, { "epoch": 1.722993732333784, "grad_norm": 0.055533487228533976, "learning_rate": 0.00015106829368994714, "loss": 0.835, "step": 98140 }, { "epoch": 1.7231692972137853, "grad_norm": 0.053564008361141086, "learning_rate": 0.0001510586799196637, "loss": 0.8505, "step": 98150 }, { "epoch": 1.7233448620937868, "grad_norm": 0.06708955020485145, "learning_rate": 0.00015104906551512717, "loss": 0.8518, "step": 98160 }, { "epoch": 1.723520426973788, "grad_norm": 0.050701243485894915, "learning_rate": 0.0001510394504764594, "loss": 0.8442, "step": 98170 }, { "epoch": 1.7236959918537895, "grad_norm": 0.05201715822176467, "learning_rate": 0.0001510298348037822, "loss": 0.8466, "step": 98180 }, { "epoch": 1.723871556733791, "grad_norm": 0.07532922201684264, "learning_rate": 0.0001510202184972174, "loss": 0.8457, "step": 98190 }, { "epoch": 1.7240471216137925, "grad_norm": 0.045883576467737164, "learning_rate": 0.00015101060155688686, "loss": 0.845, "step": 98200 }, { "epoch": 1.7242226864937937, "grad_norm": 0.058240260755423275, "learning_rate": 0.0001510009839829124, "loss": 0.8438, "step": 98210 }, { "epoch": 1.7243982513737952, "grad_norm": 0.058717622640629366, "learning_rate": 0.00015099136577541585, "loss": 0.8555, "step": 98220 }, { "epoch": 1.7245738162537965, "grad_norm": 0.061428623818586534, "learning_rate": 0.00015098174693451918, "loss": 0.8469, "step": 98230 }, { "epoch": 1.724749381133798, "grad_norm": 0.07440543065055699, "learning_rate": 0.00015097212746034415, "loss": 0.8519, "step": 98240 }, { "epoch": 1.7249249460137994, "grad_norm": 0.06627989800349525, "learning_rate": 0.00015096250735301275, "loss": 0.8525, "step": 98250 }, { "epoch": 1.725100510893801, "grad_norm": 0.05761612858917361, "learning_rate": 0.00015095288661264673, "loss": 0.854, "step": 98260 }, { "epoch": 1.7252760757738022, "grad_norm": 0.06372359396022848, "learning_rate": 0.00015094326523936813, "loss": 0.851, "step": 98270 }, { "epoch": 1.7254516406538036, "grad_norm": 0.0594459389638476, "learning_rate": 0.00015093364323329875, "loss": 0.853, "step": 98280 }, { "epoch": 1.7256272055338049, "grad_norm": 0.061158502792678725, "learning_rate": 0.00015092402059456052, "loss": 0.8451, "step": 98290 }, { "epoch": 1.7258027704138064, "grad_norm": 0.04968169337309075, "learning_rate": 0.00015091439732327542, "loss": 0.8545, "step": 98300 }, { "epoch": 1.7259783352938078, "grad_norm": 0.07158712895451569, "learning_rate": 0.00015090477341956533, "loss": 0.8473, "step": 98310 }, { "epoch": 1.7261539001738093, "grad_norm": 0.050813051433544025, "learning_rate": 0.00015089514888355217, "loss": 0.8498, "step": 98320 }, { "epoch": 1.7263294650538108, "grad_norm": 0.04713869336449074, "learning_rate": 0.00015088552371535796, "loss": 0.852, "step": 98330 }, { "epoch": 1.726505029933812, "grad_norm": 0.05793834039754169, "learning_rate": 0.0001508758979151046, "loss": 0.8419, "step": 98340 }, { "epoch": 1.7266805948138133, "grad_norm": 0.06659642410706508, "learning_rate": 0.00015086627148291402, "loss": 0.8505, "step": 98350 }, { "epoch": 1.7268561596938148, "grad_norm": 0.05989054917499796, "learning_rate": 0.00015085664441890823, "loss": 0.8506, "step": 98360 }, { "epoch": 1.7270317245738163, "grad_norm": 0.061260807817819375, "learning_rate": 0.00015084701672320926, "loss": 0.8447, "step": 98370 }, { "epoch": 1.7272072894538177, "grad_norm": 0.08723931419821015, "learning_rate": 0.00015083738839593894, "loss": 0.8495, "step": 98380 }, { "epoch": 1.7273828543338192, "grad_norm": 0.0706676973541325, "learning_rate": 0.0001508277594372194, "loss": 0.8417, "step": 98390 }, { "epoch": 1.7275584192138205, "grad_norm": 0.05362603381462821, "learning_rate": 0.00015081812984717258, "loss": 0.8426, "step": 98400 }, { "epoch": 1.7277339840938217, "grad_norm": 0.05851192581819552, "learning_rate": 0.00015080849962592055, "loss": 0.8499, "step": 98410 }, { "epoch": 1.7279095489738232, "grad_norm": 0.06177069901753289, "learning_rate": 0.00015079886877358527, "loss": 0.846, "step": 98420 }, { "epoch": 1.7280851138538247, "grad_norm": 0.05068705887466879, "learning_rate": 0.00015078923729028876, "loss": 0.8442, "step": 98430 }, { "epoch": 1.7282606787338262, "grad_norm": 0.05796676798420028, "learning_rate": 0.0001507796051761531, "loss": 0.8458, "step": 98440 }, { "epoch": 1.7284362436138276, "grad_norm": 0.052477489026851315, "learning_rate": 0.00015076997243130026, "loss": 0.8438, "step": 98450 }, { "epoch": 1.728611808493829, "grad_norm": 0.054469403100863144, "learning_rate": 0.00015076033905585238, "loss": 0.8507, "step": 98460 }, { "epoch": 1.7287873733738301, "grad_norm": 0.053563336313333966, "learning_rate": 0.00015075070504993142, "loss": 0.8488, "step": 98470 }, { "epoch": 1.7289629382538316, "grad_norm": 0.04981673172037838, "learning_rate": 0.0001507410704136595, "loss": 0.8472, "step": 98480 }, { "epoch": 1.729138503133833, "grad_norm": 0.050422251124851596, "learning_rate": 0.00015073143514715873, "loss": 0.8433, "step": 98490 }, { "epoch": 1.7293140680138346, "grad_norm": 0.07430827133895274, "learning_rate": 0.0001507217992505511, "loss": 0.8435, "step": 98500 }, { "epoch": 1.729489632893836, "grad_norm": 0.05763400623673126, "learning_rate": 0.00015071216272395874, "loss": 0.8493, "step": 98510 }, { "epoch": 1.7296651977738373, "grad_norm": 0.06162095558624064, "learning_rate": 0.00015070252556750378, "loss": 0.8535, "step": 98520 }, { "epoch": 1.7298407626538386, "grad_norm": 0.06281349190707212, "learning_rate": 0.0001506928877813083, "loss": 0.8513, "step": 98530 }, { "epoch": 1.73001632753384, "grad_norm": 0.07216847589303294, "learning_rate": 0.0001506832493654944, "loss": 0.8447, "step": 98540 }, { "epoch": 1.7301918924138415, "grad_norm": 0.04606963993901585, "learning_rate": 0.0001506736103201842, "loss": 0.8515, "step": 98550 }, { "epoch": 1.730367457293843, "grad_norm": 0.06399147001884023, "learning_rate": 0.00015066397064549984, "loss": 0.8524, "step": 98560 }, { "epoch": 1.7305430221738445, "grad_norm": 0.08222763861328515, "learning_rate": 0.00015065433034156344, "loss": 0.8443, "step": 98570 }, { "epoch": 1.7307185870538457, "grad_norm": 0.09014684113288732, "learning_rate": 0.00015064468940849716, "loss": 0.8531, "step": 98580 }, { "epoch": 1.730894151933847, "grad_norm": 0.048553338517865266, "learning_rate": 0.0001506350478464232, "loss": 0.8441, "step": 98590 }, { "epoch": 1.7310697168138485, "grad_norm": 0.05675486093690112, "learning_rate": 0.0001506254056554636, "loss": 0.8493, "step": 98600 }, { "epoch": 1.73124528169385, "grad_norm": 0.05750031826864967, "learning_rate": 0.00015061576283574067, "loss": 0.8456, "step": 98610 }, { "epoch": 1.7314208465738514, "grad_norm": 0.072611065959397, "learning_rate": 0.00015060611938737646, "loss": 0.8451, "step": 98620 }, { "epoch": 1.731596411453853, "grad_norm": 0.07019304851797256, "learning_rate": 0.00015059647531049324, "loss": 0.8564, "step": 98630 }, { "epoch": 1.7317719763338542, "grad_norm": 0.07042958565302529, "learning_rate": 0.00015058683060521316, "loss": 0.8443, "step": 98640 }, { "epoch": 1.7319475412138556, "grad_norm": 0.07228893696741615, "learning_rate": 0.00015057718527165844, "loss": 0.8449, "step": 98650 }, { "epoch": 1.732123106093857, "grad_norm": 0.05753958839569914, "learning_rate": 0.00015056753930995127, "loss": 0.8413, "step": 98660 }, { "epoch": 1.7322986709738584, "grad_norm": 0.07397959668206447, "learning_rate": 0.0001505578927202139, "loss": 0.8426, "step": 98670 }, { "epoch": 1.7324742358538598, "grad_norm": 0.05633411106005507, "learning_rate": 0.00015054824550256852, "loss": 0.8485, "step": 98680 }, { "epoch": 1.7326498007338613, "grad_norm": 0.047568517027621826, "learning_rate": 0.00015053859765713735, "loss": 0.8506, "step": 98690 }, { "epoch": 1.7328253656138626, "grad_norm": 0.05977623475778332, "learning_rate": 0.00015052894918404268, "loss": 0.8494, "step": 98700 }, { "epoch": 1.733000930493864, "grad_norm": 0.056797474151436905, "learning_rate": 0.00015051930008340675, "loss": 0.8448, "step": 98710 }, { "epoch": 1.7331764953738653, "grad_norm": 0.05452211820933728, "learning_rate": 0.00015050965035535177, "loss": 0.8509, "step": 98720 }, { "epoch": 1.7333520602538668, "grad_norm": 0.06281993723631585, "learning_rate": 0.0001505, "loss": 0.8554, "step": 98730 }, { "epoch": 1.7335276251338683, "grad_norm": 0.04507174734155711, "learning_rate": 0.00015049034901747378, "loss": 0.8476, "step": 98740 }, { "epoch": 1.7337031900138697, "grad_norm": 0.05155036661151256, "learning_rate": 0.00015048069740789533, "loss": 0.8434, "step": 98750 }, { "epoch": 1.733878754893871, "grad_norm": 0.06014055394113231, "learning_rate": 0.000150471045171387, "loss": 0.8398, "step": 98760 }, { "epoch": 1.7340543197738725, "grad_norm": 0.04978332956505653, "learning_rate": 0.00015046139230807105, "loss": 0.8574, "step": 98770 }, { "epoch": 1.7342298846538737, "grad_norm": 0.07288505217661696, "learning_rate": 0.00015045173881806975, "loss": 0.8513, "step": 98780 }, { "epoch": 1.7344054495338752, "grad_norm": 0.06502610395262082, "learning_rate": 0.00015044208470150545, "loss": 0.8498, "step": 98790 }, { "epoch": 1.7345810144138767, "grad_norm": 0.0625990790053323, "learning_rate": 0.00015043242995850046, "loss": 0.8529, "step": 98800 }, { "epoch": 1.7347565792938782, "grad_norm": 0.061329946149352776, "learning_rate": 0.0001504227745891771, "loss": 0.8371, "step": 98810 }, { "epoch": 1.7349321441738794, "grad_norm": 0.04463362014794127, "learning_rate": 0.00015041311859365775, "loss": 0.8477, "step": 98820 }, { "epoch": 1.735107709053881, "grad_norm": 0.05135968858192828, "learning_rate": 0.0001504034619720647, "loss": 0.8511, "step": 98830 }, { "epoch": 1.7352832739338822, "grad_norm": 0.05502900078444362, "learning_rate": 0.0001503938047245203, "loss": 0.8435, "step": 98840 }, { "epoch": 1.7354588388138836, "grad_norm": 0.05814079010173844, "learning_rate": 0.00015038414685114695, "loss": 0.8424, "step": 98850 }, { "epoch": 1.735634403693885, "grad_norm": 0.05917848986174673, "learning_rate": 0.00015037448835206695, "loss": 0.8396, "step": 98860 }, { "epoch": 1.7358099685738866, "grad_norm": 0.06328963192123692, "learning_rate": 0.00015036482922740275, "loss": 0.8429, "step": 98870 }, { "epoch": 1.7359855334538878, "grad_norm": 0.08323631268322775, "learning_rate": 0.00015035516947727674, "loss": 0.8471, "step": 98880 }, { "epoch": 1.7361610983338893, "grad_norm": 0.06468532609743846, "learning_rate": 0.00015034550910181122, "loss": 0.8515, "step": 98890 }, { "epoch": 1.7363366632138906, "grad_norm": 0.07374144043767022, "learning_rate": 0.00015033584810112866, "loss": 0.8447, "step": 98900 }, { "epoch": 1.736512228093892, "grad_norm": 0.0603905265021126, "learning_rate": 0.00015032618647535146, "loss": 0.8438, "step": 98910 }, { "epoch": 1.7366877929738935, "grad_norm": 0.07926063484322465, "learning_rate": 0.00015031652422460202, "loss": 0.8506, "step": 98920 }, { "epoch": 1.736863357853895, "grad_norm": 0.038898383927446274, "learning_rate": 0.00015030686134900276, "loss": 0.854, "step": 98930 }, { "epoch": 1.7370389227338963, "grad_norm": 0.06628666897035601, "learning_rate": 0.0001502971978486761, "loss": 0.8523, "step": 98940 }, { "epoch": 1.7372144876138977, "grad_norm": 0.05672838658921226, "learning_rate": 0.00015028753372374457, "loss": 0.8428, "step": 98950 }, { "epoch": 1.737390052493899, "grad_norm": 0.04795006147611145, "learning_rate": 0.00015027786897433044, "loss": 0.8468, "step": 98960 }, { "epoch": 1.7375656173739005, "grad_norm": 0.07213981366076495, "learning_rate": 0.00015026820360055634, "loss": 0.8563, "step": 98970 }, { "epoch": 1.737741182253902, "grad_norm": 0.07877007512515664, "learning_rate": 0.0001502585376025446, "loss": 0.8489, "step": 98980 }, { "epoch": 1.7379167471339034, "grad_norm": 0.06406492532983617, "learning_rate": 0.0001502488709804178, "loss": 0.8403, "step": 98990 }, { "epoch": 1.7380923120139047, "grad_norm": 0.06535492084124218, "learning_rate": 0.00015023920373429835, "loss": 0.8434, "step": 99000 }, { "epoch": 1.7382678768939062, "grad_norm": 0.09204183390962276, "learning_rate": 0.00015022953586430875, "loss": 0.8455, "step": 99010 }, { "epoch": 1.7384434417739074, "grad_norm": 0.05054325648062031, "learning_rate": 0.0001502198673705715, "loss": 0.8369, "step": 99020 }, { "epoch": 1.738619006653909, "grad_norm": 0.06486350576869376, "learning_rate": 0.00015021019825320908, "loss": 0.8526, "step": 99030 }, { "epoch": 1.7387945715339104, "grad_norm": 0.054398448771459336, "learning_rate": 0.00015020052851234404, "loss": 0.8488, "step": 99040 }, { "epoch": 1.7389701364139118, "grad_norm": 0.058786150484368624, "learning_rate": 0.00015019085814809888, "loss": 0.8472, "step": 99050 }, { "epoch": 1.7391457012939133, "grad_norm": 0.04908155724098363, "learning_rate": 0.0001501811871605961, "loss": 0.8406, "step": 99060 }, { "epoch": 1.7393212661739146, "grad_norm": 0.06678174491482253, "learning_rate": 0.00015017151554995824, "loss": 0.8554, "step": 99070 }, { "epoch": 1.7394968310539158, "grad_norm": 0.05986068730475089, "learning_rate": 0.00015016184331630787, "loss": 0.8431, "step": 99080 }, { "epoch": 1.7396723959339173, "grad_norm": 0.06281691313699242, "learning_rate": 0.0001501521704597675, "loss": 0.8417, "step": 99090 }, { "epoch": 1.7398479608139188, "grad_norm": 0.04925096516173562, "learning_rate": 0.00015014249698045977, "loss": 0.8496, "step": 99100 }, { "epoch": 1.7400235256939203, "grad_norm": 0.059614685169050746, "learning_rate": 0.00015013282287850715, "loss": 0.8483, "step": 99110 }, { "epoch": 1.7401990905739217, "grad_norm": 0.061657876772163674, "learning_rate": 0.00015012314815403225, "loss": 0.85, "step": 99120 }, { "epoch": 1.740374655453923, "grad_norm": 0.051076514945252026, "learning_rate": 0.0001501134728071576, "loss": 0.8454, "step": 99130 }, { "epoch": 1.7405502203339243, "grad_norm": 0.057935425478135585, "learning_rate": 0.00015010379683800593, "loss": 0.841, "step": 99140 }, { "epoch": 1.7407257852139257, "grad_norm": 0.04654527631280155, "learning_rate": 0.00015009412024669967, "loss": 0.8487, "step": 99150 }, { "epoch": 1.7409013500939272, "grad_norm": 0.06774620511629431, "learning_rate": 0.00015008444303336152, "loss": 0.8448, "step": 99160 }, { "epoch": 1.7410769149739287, "grad_norm": 0.05498508371825022, "learning_rate": 0.0001500747651981141, "loss": 0.8514, "step": 99170 }, { "epoch": 1.7412524798539302, "grad_norm": 0.06108917042964049, "learning_rate": 0.00015006508674107996, "loss": 0.8518, "step": 99180 }, { "epoch": 1.7414280447339314, "grad_norm": 0.0937239931002461, "learning_rate": 0.00015005540766238178, "loss": 0.8472, "step": 99190 }, { "epoch": 1.7416036096139327, "grad_norm": 0.06217339406245173, "learning_rate": 0.0001500457279621422, "loss": 0.85, "step": 99200 }, { "epoch": 1.7417791744939342, "grad_norm": 0.059329388729044676, "learning_rate": 0.0001500360476404838, "loss": 0.8444, "step": 99210 }, { "epoch": 1.7419547393739356, "grad_norm": 0.07505488799979677, "learning_rate": 0.00015002636669752932, "loss": 0.8524, "step": 99220 }, { "epoch": 1.7421303042539371, "grad_norm": 0.04597591716958494, "learning_rate": 0.00015001668513340137, "loss": 0.8435, "step": 99230 }, { "epoch": 1.7423058691339386, "grad_norm": 0.06361927376655267, "learning_rate": 0.00015000700294822263, "loss": 0.8521, "step": 99240 }, { "epoch": 1.7424814340139398, "grad_norm": 0.05311407550728778, "learning_rate": 0.00014999732014211575, "loss": 0.8438, "step": 99250 }, { "epoch": 1.742656998893941, "grad_norm": 0.060524813365248606, "learning_rate": 0.00014998763671520346, "loss": 0.8535, "step": 99260 }, { "epoch": 1.7428325637739426, "grad_norm": 0.05809119059650826, "learning_rate": 0.00014997795266760843, "loss": 0.855, "step": 99270 }, { "epoch": 1.743008128653944, "grad_norm": 0.07110139086799344, "learning_rate": 0.0001499682679994533, "loss": 0.8586, "step": 99280 }, { "epoch": 1.7431836935339455, "grad_norm": 0.04914840603573747, "learning_rate": 0.0001499585827108609, "loss": 0.8486, "step": 99290 }, { "epoch": 1.743359258413947, "grad_norm": 0.05286238862342092, "learning_rate": 0.00014994889680195383, "loss": 0.853, "step": 99300 }, { "epoch": 1.7435348232939483, "grad_norm": 0.05285327481353944, "learning_rate": 0.00014993921027285488, "loss": 0.8546, "step": 99310 }, { "epoch": 1.7437103881739495, "grad_norm": 0.07066219005255156, "learning_rate": 0.0001499295231236867, "loss": 0.8533, "step": 99320 }, { "epoch": 1.743885953053951, "grad_norm": 0.05910628855162143, "learning_rate": 0.00014991983535457218, "loss": 0.8593, "step": 99330 }, { "epoch": 1.7440615179339525, "grad_norm": 0.07826693959285057, "learning_rate": 0.00014991014696563387, "loss": 0.8466, "step": 99340 }, { "epoch": 1.744237082813954, "grad_norm": 0.05537173672246361, "learning_rate": 0.00014990045795699468, "loss": 0.8563, "step": 99350 }, { "epoch": 1.7444126476939554, "grad_norm": 0.06409578432790305, "learning_rate": 0.0001498907683287773, "loss": 0.8419, "step": 99360 }, { "epoch": 1.7445882125739567, "grad_norm": 0.04680093012280835, "learning_rate": 0.00014988107808110453, "loss": 0.8439, "step": 99370 }, { "epoch": 1.7447637774539582, "grad_norm": 0.08144459766388984, "learning_rate": 0.00014987138721409915, "loss": 0.8471, "step": 99380 }, { "epoch": 1.7449393423339594, "grad_norm": 0.06267057147946939, "learning_rate": 0.00014986169572788388, "loss": 0.8512, "step": 99390 }, { "epoch": 1.745114907213961, "grad_norm": 0.06272340917577833, "learning_rate": 0.00014985200362258158, "loss": 0.8456, "step": 99400 }, { "epoch": 1.7452904720939624, "grad_norm": 0.06566217899658852, "learning_rate": 0.00014984231089831506, "loss": 0.85, "step": 99410 }, { "epoch": 1.7454660369739639, "grad_norm": 0.08002125502648781, "learning_rate": 0.00014983261755520706, "loss": 0.8422, "step": 99420 }, { "epoch": 1.745641601853965, "grad_norm": 0.04149086205743743, "learning_rate": 0.00014982292359338046, "loss": 0.8424, "step": 99430 }, { "epoch": 1.7458171667339666, "grad_norm": 0.05916991697900539, "learning_rate": 0.00014981322901295806, "loss": 0.8511, "step": 99440 }, { "epoch": 1.7459927316139678, "grad_norm": 0.07000303749545639, "learning_rate": 0.0001498035338140627, "loss": 0.8537, "step": 99450 }, { "epoch": 1.7461682964939693, "grad_norm": 0.058808690171777624, "learning_rate": 0.00014979383799681718, "loss": 0.8434, "step": 99460 }, { "epoch": 1.7463438613739708, "grad_norm": 0.06498735864410474, "learning_rate": 0.00014978414156134438, "loss": 0.8442, "step": 99470 }, { "epoch": 1.7465194262539723, "grad_norm": 0.05126355991272636, "learning_rate": 0.00014977444450776724, "loss": 0.8505, "step": 99480 }, { "epoch": 1.7466949911339735, "grad_norm": 0.04863822052861131, "learning_rate": 0.00014976474683620845, "loss": 0.8494, "step": 99490 }, { "epoch": 1.746870556013975, "grad_norm": 0.054299575217619765, "learning_rate": 0.00014975504854679102, "loss": 0.8438, "step": 99500 }, { "epoch": 1.7470461208939763, "grad_norm": 0.04807307917258277, "learning_rate": 0.00014974534963963776, "loss": 0.8481, "step": 99510 }, { "epoch": 1.7472216857739777, "grad_norm": 0.052952041707364894, "learning_rate": 0.00014973565011487157, "loss": 0.8497, "step": 99520 }, { "epoch": 1.7473972506539792, "grad_norm": 0.07447129507475768, "learning_rate": 0.00014972594997261535, "loss": 0.8493, "step": 99530 }, { "epoch": 1.7475728155339807, "grad_norm": 0.05650589754176557, "learning_rate": 0.00014971624921299203, "loss": 0.8472, "step": 99540 }, { "epoch": 1.747748380413982, "grad_norm": 0.053034675398742626, "learning_rate": 0.0001497065478361245, "loss": 0.843, "step": 99550 }, { "epoch": 1.7479239452939834, "grad_norm": 0.05334475400590366, "learning_rate": 0.00014969684584213567, "loss": 0.8466, "step": 99560 }, { "epoch": 1.7480995101739847, "grad_norm": 0.0493346672869127, "learning_rate": 0.00014968714323114842, "loss": 0.8532, "step": 99570 }, { "epoch": 1.7482750750539862, "grad_norm": 0.04911919156632359, "learning_rate": 0.00014967744000328578, "loss": 0.8514, "step": 99580 }, { "epoch": 1.7484506399339876, "grad_norm": 0.04932001949682425, "learning_rate": 0.00014966773615867065, "loss": 0.8463, "step": 99590 }, { "epoch": 1.7486262048139891, "grad_norm": 0.05365447130394192, "learning_rate": 0.00014965803169742598, "loss": 0.8451, "step": 99600 }, { "epoch": 1.7488017696939904, "grad_norm": 0.0567565985736806, "learning_rate": 0.00014964832661967472, "loss": 0.859, "step": 99610 }, { "epoch": 1.7489773345739918, "grad_norm": 0.04531954882204411, "learning_rate": 0.0001496386209255398, "loss": 0.8452, "step": 99620 }, { "epoch": 1.749152899453993, "grad_norm": 0.06582661547721243, "learning_rate": 0.00014962891461514427, "loss": 0.8489, "step": 99630 }, { "epoch": 1.7493284643339946, "grad_norm": 0.059168221673006845, "learning_rate": 0.00014961920768861105, "loss": 0.8388, "step": 99640 }, { "epoch": 1.749504029213996, "grad_norm": 0.07057056516221366, "learning_rate": 0.0001496095001460632, "loss": 0.8406, "step": 99650 }, { "epoch": 1.7496795940939975, "grad_norm": 0.05433722455003561, "learning_rate": 0.00014959979198762358, "loss": 0.8487, "step": 99660 }, { "epoch": 1.7498551589739988, "grad_norm": 0.0710508127963252, "learning_rate": 0.00014959008321341533, "loss": 0.8464, "step": 99670 }, { "epoch": 1.7500307238540003, "grad_norm": 0.047301489699058334, "learning_rate": 0.00014958037382356145, "loss": 0.8452, "step": 99680 }, { "epoch": 1.7502062887340015, "grad_norm": 0.04475055213831268, "learning_rate": 0.00014957066381818488, "loss": 0.8474, "step": 99690 }, { "epoch": 1.750381853614003, "grad_norm": 0.05463109335700984, "learning_rate": 0.00014956095319740867, "loss": 0.8452, "step": 99700 }, { "epoch": 1.7505574184940045, "grad_norm": 0.06878409101547579, "learning_rate": 0.00014955124196135592, "loss": 0.8544, "step": 99710 }, { "epoch": 1.750732983374006, "grad_norm": 0.05012801216514237, "learning_rate": 0.00014954153011014962, "loss": 0.8441, "step": 99720 }, { "epoch": 1.7509085482540072, "grad_norm": 0.08219758977439091, "learning_rate": 0.00014953181764391286, "loss": 0.8466, "step": 99730 }, { "epoch": 1.7510841131340087, "grad_norm": 0.07092488762595216, "learning_rate": 0.0001495221045627686, "loss": 0.8511, "step": 99740 }, { "epoch": 1.75125967801401, "grad_norm": 0.05914972023226812, "learning_rate": 0.00014951239086684003, "loss": 0.8459, "step": 99750 }, { "epoch": 1.7514352428940114, "grad_norm": 0.07313104720578638, "learning_rate": 0.00014950267655625013, "loss": 0.8407, "step": 99760 }, { "epoch": 1.751610807774013, "grad_norm": 0.07709858248861258, "learning_rate": 0.00014949296163112205, "loss": 0.84, "step": 99770 }, { "epoch": 1.7517863726540144, "grad_norm": 0.08932769886023822, "learning_rate": 0.00014948324609157887, "loss": 0.8517, "step": 99780 }, { "epoch": 1.7519619375340159, "grad_norm": 0.05311146323252581, "learning_rate": 0.00014947352993774363, "loss": 0.8446, "step": 99790 }, { "epoch": 1.7521375024140171, "grad_norm": 0.05878548261690554, "learning_rate": 0.00014946381316973949, "loss": 0.8464, "step": 99800 }, { "epoch": 1.7523130672940184, "grad_norm": 0.06003053251791241, "learning_rate": 0.00014945409578768953, "loss": 0.841, "step": 99810 }, { "epoch": 1.7524886321740198, "grad_norm": 0.04652962401904225, "learning_rate": 0.00014944437779171695, "loss": 0.8461, "step": 99820 }, { "epoch": 1.7526641970540213, "grad_norm": 0.05141202943263027, "learning_rate": 0.00014943465918194478, "loss": 0.8495, "step": 99830 }, { "epoch": 1.7528397619340228, "grad_norm": 0.046499952022495815, "learning_rate": 0.00014942493995849618, "loss": 0.8463, "step": 99840 }, { "epoch": 1.7530153268140243, "grad_norm": 0.04853805008628242, "learning_rate": 0.00014941522012149435, "loss": 0.8434, "step": 99850 }, { "epoch": 1.7531908916940255, "grad_norm": 0.046672044019442666, "learning_rate": 0.00014940549967106234, "loss": 0.8485, "step": 99860 }, { "epoch": 1.7533664565740268, "grad_norm": 0.08292372204144013, "learning_rate": 0.00014939577860732345, "loss": 0.8451, "step": 99870 }, { "epoch": 1.7535420214540283, "grad_norm": 0.06969039489655623, "learning_rate": 0.00014938605693040068, "loss": 0.8398, "step": 99880 }, { "epoch": 1.7537175863340297, "grad_norm": 0.06677551880387926, "learning_rate": 0.00014937633464041735, "loss": 0.8592, "step": 99890 }, { "epoch": 1.7538931512140312, "grad_norm": 0.05475033707739741, "learning_rate": 0.00014936661173749663, "loss": 0.8466, "step": 99900 }, { "epoch": 1.7540687160940327, "grad_norm": 0.05036504940148524, "learning_rate": 0.0001493568882217616, "loss": 0.8501, "step": 99910 }, { "epoch": 1.754244280974034, "grad_norm": 0.05512513175458567, "learning_rate": 0.00014934716409333555, "loss": 0.8523, "step": 99920 }, { "epoch": 1.7544198458540352, "grad_norm": 0.04769026227140717, "learning_rate": 0.00014933743935234165, "loss": 0.8552, "step": 99930 }, { "epoch": 1.7545954107340367, "grad_norm": 0.07635643037061669, "learning_rate": 0.00014932771399890318, "loss": 0.8464, "step": 99940 }, { "epoch": 1.7547709756140382, "grad_norm": 0.04669067418617007, "learning_rate": 0.0001493179880331433, "loss": 0.849, "step": 99950 }, { "epoch": 1.7549465404940396, "grad_norm": 0.08934014879094156, "learning_rate": 0.0001493082614551852, "loss": 0.8477, "step": 99960 }, { "epoch": 1.7551221053740411, "grad_norm": 0.04799352907511265, "learning_rate": 0.0001492985342651522, "loss": 0.8491, "step": 99970 }, { "epoch": 1.7552976702540424, "grad_norm": 0.12135115781997591, "learning_rate": 0.0001492888064631675, "loss": 0.8429, "step": 99980 }, { "epoch": 1.7554732351340436, "grad_norm": 0.04495578884984961, "learning_rate": 0.0001492790780493544, "loss": 0.8436, "step": 99990 }, { "epoch": 1.755648800014045, "grad_norm": 0.048582984534774255, "learning_rate": 0.00014926934902383612, "loss": 0.8468, "step": 100000 }, { "epoch": 1.7558243648940466, "grad_norm": 0.05176372208027687, "learning_rate": 0.00014925961938673594, "loss": 0.8472, "step": 100010 }, { "epoch": 1.755999929774048, "grad_norm": 0.05039436918046425, "learning_rate": 0.00014924988913817713, "loss": 0.8453, "step": 100020 }, { "epoch": 1.7561754946540495, "grad_norm": 0.06165304427261012, "learning_rate": 0.00014924015827828295, "loss": 0.8426, "step": 100030 }, { "epoch": 1.7563510595340508, "grad_norm": 0.07770587040256656, "learning_rate": 0.00014923042680717674, "loss": 0.8433, "step": 100040 }, { "epoch": 1.756526624414052, "grad_norm": 0.05299270185481981, "learning_rate": 0.00014922069472498176, "loss": 0.8473, "step": 100050 }, { "epoch": 1.7567021892940535, "grad_norm": 0.06027716082753529, "learning_rate": 0.00014921096203182136, "loss": 0.8464, "step": 100060 }, { "epoch": 1.756877754174055, "grad_norm": 0.04703811901543525, "learning_rate": 0.00014920122872781884, "loss": 0.8403, "step": 100070 }, { "epoch": 1.7570533190540565, "grad_norm": 0.05599309915794096, "learning_rate": 0.0001491914948130975, "loss": 0.8495, "step": 100080 }, { "epoch": 1.757228883934058, "grad_norm": 0.06350336194644891, "learning_rate": 0.00014918176028778067, "loss": 0.8421, "step": 100090 }, { "epoch": 1.7574044488140592, "grad_norm": 0.05745831353746638, "learning_rate": 0.00014917202515199172, "loss": 0.8437, "step": 100100 }, { "epoch": 1.7575800136940607, "grad_norm": 0.04668624993273089, "learning_rate": 0.00014916228940585398, "loss": 0.8422, "step": 100110 }, { "epoch": 1.757755578574062, "grad_norm": 0.04659845552074928, "learning_rate": 0.00014915255304949083, "loss": 0.8467, "step": 100120 }, { "epoch": 1.7579311434540634, "grad_norm": 0.056292166959542984, "learning_rate": 0.00014914281608302554, "loss": 0.8507, "step": 100130 }, { "epoch": 1.758106708334065, "grad_norm": 0.04449088392484191, "learning_rate": 0.0001491330785065816, "loss": 0.8479, "step": 100140 }, { "epoch": 1.7582822732140664, "grad_norm": 0.058031254030030725, "learning_rate": 0.0001491233403202823, "loss": 0.8507, "step": 100150 }, { "epoch": 1.7584578380940676, "grad_norm": 0.045054691943088476, "learning_rate": 0.00014911360152425107, "loss": 0.8513, "step": 100160 }, { "epoch": 1.7586334029740691, "grad_norm": 0.0567007979379267, "learning_rate": 0.00014910386211861127, "loss": 0.8478, "step": 100170 }, { "epoch": 1.7588089678540704, "grad_norm": 0.07371419084725145, "learning_rate": 0.00014909412210348632, "loss": 0.8359, "step": 100180 }, { "epoch": 1.7589845327340718, "grad_norm": 0.08768198212548785, "learning_rate": 0.00014908438147899966, "loss": 0.85, "step": 100190 }, { "epoch": 1.7591600976140733, "grad_norm": 0.056764835307534775, "learning_rate": 0.00014907464024527462, "loss": 0.8469, "step": 100200 }, { "epoch": 1.7593356624940748, "grad_norm": 0.04886168454998745, "learning_rate": 0.00014906489840243474, "loss": 0.8465, "step": 100210 }, { "epoch": 1.759511227374076, "grad_norm": 0.06850907962987916, "learning_rate": 0.00014905515595060333, "loss": 0.8562, "step": 100220 }, { "epoch": 1.7596867922540775, "grad_norm": 0.07381106251839356, "learning_rate": 0.0001490454128899039, "loss": 0.8425, "step": 100230 }, { "epoch": 1.7598623571340788, "grad_norm": 0.05247618864156942, "learning_rate": 0.00014903566922045992, "loss": 0.8509, "step": 100240 }, { "epoch": 1.7600379220140803, "grad_norm": 0.07085769113607933, "learning_rate": 0.00014902592494239473, "loss": 0.8522, "step": 100250 }, { "epoch": 1.7602134868940817, "grad_norm": 0.04984498185638531, "learning_rate": 0.00014901618005583192, "loss": 0.8511, "step": 100260 }, { "epoch": 1.7603890517740832, "grad_norm": 0.05471020597216379, "learning_rate": 0.00014900643456089487, "loss": 0.8565, "step": 100270 }, { "epoch": 1.7605646166540845, "grad_norm": 0.054512240668882425, "learning_rate": 0.0001489966884577071, "loss": 0.8401, "step": 100280 }, { "epoch": 1.760740181534086, "grad_norm": 0.06112030800062166, "learning_rate": 0.00014898694174639215, "loss": 0.8517, "step": 100290 }, { "epoch": 1.7609157464140872, "grad_norm": 0.04902284573274158, "learning_rate": 0.00014897719442707335, "loss": 0.8518, "step": 100300 }, { "epoch": 1.7610913112940887, "grad_norm": 0.08821569782957299, "learning_rate": 0.00014896744649987439, "loss": 0.8436, "step": 100310 }, { "epoch": 1.7612668761740902, "grad_norm": 0.05248170484461329, "learning_rate": 0.00014895769796491865, "loss": 0.8451, "step": 100320 }, { "epoch": 1.7614424410540916, "grad_norm": 0.05413412495039793, "learning_rate": 0.0001489479488223297, "loss": 0.8538, "step": 100330 }, { "epoch": 1.761618005934093, "grad_norm": 0.06757569046279427, "learning_rate": 0.00014893819907223102, "loss": 0.8455, "step": 100340 }, { "epoch": 1.7617935708140944, "grad_norm": 0.06550038417899663, "learning_rate": 0.0001489284487147462, "loss": 0.8439, "step": 100350 }, { "epoch": 1.7619691356940956, "grad_norm": 0.06068355836460201, "learning_rate": 0.00014891869774999876, "loss": 0.8503, "step": 100360 }, { "epoch": 1.7621447005740971, "grad_norm": 0.05286240038847941, "learning_rate": 0.0001489089461781122, "loss": 0.8485, "step": 100370 }, { "epoch": 1.7623202654540986, "grad_norm": 0.064487530455774, "learning_rate": 0.00014889919399921014, "loss": 0.8466, "step": 100380 }, { "epoch": 1.7624958303341, "grad_norm": 0.07444212447438718, "learning_rate": 0.0001488894412134161, "loss": 0.8421, "step": 100390 }, { "epoch": 1.7626713952141013, "grad_norm": 0.06646532810093449, "learning_rate": 0.00014887968782085367, "loss": 0.8463, "step": 100400 }, { "epoch": 1.7628469600941028, "grad_norm": 0.056911191624414194, "learning_rate": 0.0001488699338216464, "loss": 0.8453, "step": 100410 }, { "epoch": 1.763022524974104, "grad_norm": 0.06293564655400136, "learning_rate": 0.0001488601792159179, "loss": 0.8529, "step": 100420 }, { "epoch": 1.7631980898541055, "grad_norm": 0.08176325476034543, "learning_rate": 0.00014885042400379178, "loss": 0.842, "step": 100430 }, { "epoch": 1.763373654734107, "grad_norm": 0.06608154789623585, "learning_rate": 0.00014884066818539158, "loss": 0.8448, "step": 100440 }, { "epoch": 1.7635492196141085, "grad_norm": 0.047199418899846926, "learning_rate": 0.00014883091176084098, "loss": 0.8475, "step": 100450 }, { "epoch": 1.7637247844941097, "grad_norm": 0.06284066574970712, "learning_rate": 0.00014882115473026356, "loss": 0.8567, "step": 100460 }, { "epoch": 1.7639003493741112, "grad_norm": 0.05371101731923757, "learning_rate": 0.0001488113970937829, "loss": 0.8543, "step": 100470 }, { "epoch": 1.7640759142541125, "grad_norm": 0.05203556876907882, "learning_rate": 0.0001488016388515227, "loss": 0.8449, "step": 100480 }, { "epoch": 1.764251479134114, "grad_norm": 0.06071773680845286, "learning_rate": 0.0001487918800036066, "loss": 0.8511, "step": 100490 }, { "epoch": 1.7644270440141154, "grad_norm": 0.07339285351702078, "learning_rate": 0.0001487821205501582, "loss": 0.845, "step": 100500 }, { "epoch": 1.764602608894117, "grad_norm": 0.054703087783145296, "learning_rate": 0.00014877236049130118, "loss": 0.8449, "step": 100510 }, { "epoch": 1.7647781737741184, "grad_norm": 0.057626888994876176, "learning_rate": 0.0001487625998271592, "loss": 0.8516, "step": 100520 }, { "epoch": 1.7649537386541196, "grad_norm": 0.057985516067943596, "learning_rate": 0.00014875283855785593, "loss": 0.8454, "step": 100530 }, { "epoch": 1.765129303534121, "grad_norm": 0.06469423097783478, "learning_rate": 0.00014874307668351502, "loss": 0.8461, "step": 100540 }, { "epoch": 1.7653048684141224, "grad_norm": 0.06404380760314742, "learning_rate": 0.0001487333142042602, "loss": 0.8487, "step": 100550 }, { "epoch": 1.7654804332941239, "grad_norm": 0.056386218532098194, "learning_rate": 0.00014872355112021513, "loss": 0.8462, "step": 100560 }, { "epoch": 1.7656559981741253, "grad_norm": 0.04321119408331251, "learning_rate": 0.00014871378743150352, "loss": 0.8495, "step": 100570 }, { "epoch": 1.7658315630541268, "grad_norm": 0.064112302646263, "learning_rate": 0.0001487040231382491, "loss": 0.8414, "step": 100580 }, { "epoch": 1.766007127934128, "grad_norm": 0.06425962487751692, "learning_rate": 0.0001486942582405755, "loss": 0.8453, "step": 100590 }, { "epoch": 1.7661826928141293, "grad_norm": 0.062313032238668506, "learning_rate": 0.0001486844927386066, "loss": 0.8392, "step": 100600 }, { "epoch": 1.7663582576941308, "grad_norm": 0.05313284477607172, "learning_rate": 0.00014867472663246598, "loss": 0.843, "step": 100610 }, { "epoch": 1.7665338225741323, "grad_norm": 0.094146501237338, "learning_rate": 0.00014866495992227745, "loss": 0.8558, "step": 100620 }, { "epoch": 1.7667093874541338, "grad_norm": 0.06353137042122002, "learning_rate": 0.00014865519260816477, "loss": 0.8546, "step": 100630 }, { "epoch": 1.7668849523341352, "grad_norm": 0.04700247750601395, "learning_rate": 0.00014864542469025165, "loss": 0.8488, "step": 100640 }, { "epoch": 1.7670605172141365, "grad_norm": 0.061492531136042694, "learning_rate": 0.00014863565616866185, "loss": 0.8388, "step": 100650 }, { "epoch": 1.7672360820941377, "grad_norm": 0.04645354614753374, "learning_rate": 0.00014862588704351914, "loss": 0.842, "step": 100660 }, { "epoch": 1.7674116469741392, "grad_norm": 0.0727807154995062, "learning_rate": 0.00014861611731494735, "loss": 0.846, "step": 100670 }, { "epoch": 1.7675872118541407, "grad_norm": 0.06709063910393628, "learning_rate": 0.0001486063469830702, "loss": 0.841, "step": 100680 }, { "epoch": 1.7677627767341422, "grad_norm": 0.050845295151977644, "learning_rate": 0.00014859657604801156, "loss": 0.8442, "step": 100690 }, { "epoch": 1.7679383416141436, "grad_norm": 0.05003359251736988, "learning_rate": 0.00014858680450989512, "loss": 0.8435, "step": 100700 }, { "epoch": 1.768113906494145, "grad_norm": 0.055903475971935034, "learning_rate": 0.0001485770323688448, "loss": 0.8397, "step": 100710 }, { "epoch": 1.7682894713741462, "grad_norm": 0.04983417034678124, "learning_rate": 0.00014856725962498433, "loss": 0.843, "step": 100720 }, { "epoch": 1.7684650362541476, "grad_norm": 0.058744119984620535, "learning_rate": 0.00014855748627843756, "loss": 0.8454, "step": 100730 }, { "epoch": 1.7686406011341491, "grad_norm": 0.0630039018145193, "learning_rate": 0.00014854771232932835, "loss": 0.8442, "step": 100740 }, { "epoch": 1.7688161660141506, "grad_norm": 0.07607105023826238, "learning_rate": 0.0001485379377777805, "loss": 0.851, "step": 100750 }, { "epoch": 1.768991730894152, "grad_norm": 0.05024397336875885, "learning_rate": 0.00014852816262391784, "loss": 0.8504, "step": 100760 }, { "epoch": 1.7691672957741533, "grad_norm": 0.05067327300843907, "learning_rate": 0.00014851838686786429, "loss": 0.844, "step": 100770 }, { "epoch": 1.7693428606541546, "grad_norm": 0.06241674059093423, "learning_rate": 0.00014850861050974364, "loss": 0.8394, "step": 100780 }, { "epoch": 1.769518425534156, "grad_norm": 0.04794849210018127, "learning_rate": 0.00014849883354967983, "loss": 0.8441, "step": 100790 }, { "epoch": 1.7696939904141575, "grad_norm": 0.06065690823980655, "learning_rate": 0.00014848905598779667, "loss": 0.8436, "step": 100800 }, { "epoch": 1.769869555294159, "grad_norm": 0.04936322614987246, "learning_rate": 0.00014847927782421806, "loss": 0.8474, "step": 100810 }, { "epoch": 1.7700451201741605, "grad_norm": 0.0618279587382625, "learning_rate": 0.00014846949905906792, "loss": 0.8545, "step": 100820 }, { "epoch": 1.7702206850541617, "grad_norm": 0.05910263160699115, "learning_rate": 0.00014845971969247013, "loss": 0.84, "step": 100830 }, { "epoch": 1.7703962499341632, "grad_norm": 0.05147808291324231, "learning_rate": 0.00014844993972454857, "loss": 0.8455, "step": 100840 }, { "epoch": 1.7705718148141645, "grad_norm": 0.06899171740338138, "learning_rate": 0.00014844015915542718, "loss": 0.851, "step": 100850 }, { "epoch": 1.770747379694166, "grad_norm": 0.05472539666052342, "learning_rate": 0.00014843037798522994, "loss": 0.8465, "step": 100860 }, { "epoch": 1.7709229445741674, "grad_norm": 0.05201139708088321, "learning_rate": 0.00014842059621408067, "loss": 0.8539, "step": 100870 }, { "epoch": 1.771098509454169, "grad_norm": 0.0590639585639497, "learning_rate": 0.00014841081384210338, "loss": 0.8392, "step": 100880 }, { "epoch": 1.7712740743341702, "grad_norm": 0.06235000952710594, "learning_rate": 0.00014840103086942197, "loss": 0.849, "step": 100890 }, { "epoch": 1.7714496392141716, "grad_norm": 0.05638156975131798, "learning_rate": 0.0001483912472961604, "loss": 0.8471, "step": 100900 }, { "epoch": 1.771625204094173, "grad_norm": 0.05578005632932971, "learning_rate": 0.00014838146312244269, "loss": 0.8508, "step": 100910 }, { "epoch": 1.7718007689741744, "grad_norm": 0.04523307547013058, "learning_rate": 0.00014837167834839276, "loss": 0.8493, "step": 100920 }, { "epoch": 1.7719763338541759, "grad_norm": 0.07163411894290149, "learning_rate": 0.00014836189297413454, "loss": 0.8447, "step": 100930 }, { "epoch": 1.7721518987341773, "grad_norm": 0.06775934715220515, "learning_rate": 0.0001483521069997921, "loss": 0.8413, "step": 100940 }, { "epoch": 1.7723274636141786, "grad_norm": 0.07516820385198386, "learning_rate": 0.00014834232042548938, "loss": 0.8499, "step": 100950 }, { "epoch": 1.77250302849418, "grad_norm": 0.04996942122537086, "learning_rate": 0.00014833253325135037, "loss": 0.8445, "step": 100960 }, { "epoch": 1.7726785933741813, "grad_norm": 0.056483648982587294, "learning_rate": 0.00014832274547749912, "loss": 0.8452, "step": 100970 }, { "epoch": 1.7728541582541828, "grad_norm": 0.05585237917456498, "learning_rate": 0.00014831295710405958, "loss": 0.8546, "step": 100980 }, { "epoch": 1.7730297231341843, "grad_norm": 0.08726791194681192, "learning_rate": 0.00014830316813115583, "loss": 0.8459, "step": 100990 }, { "epoch": 1.7732052880141858, "grad_norm": 0.05581989961921171, "learning_rate": 0.0001482933785589119, "loss": 0.8486, "step": 101000 }, { "epoch": 1.773380852894187, "grad_norm": 0.08220749099354357, "learning_rate": 0.00014828358838745175, "loss": 0.8514, "step": 101010 }, { "epoch": 1.7735564177741885, "grad_norm": 0.045175673862986626, "learning_rate": 0.0001482737976168995, "loss": 0.8485, "step": 101020 }, { "epoch": 1.7737319826541897, "grad_norm": 0.049875292374703506, "learning_rate": 0.00014826400624737916, "loss": 0.8509, "step": 101030 }, { "epoch": 1.7739075475341912, "grad_norm": 0.06044144183364798, "learning_rate": 0.00014825421427901484, "loss": 0.8501, "step": 101040 }, { "epoch": 1.7740831124141927, "grad_norm": 0.06702917999180036, "learning_rate": 0.00014824442171193053, "loss": 0.84, "step": 101050 }, { "epoch": 1.7742586772941942, "grad_norm": 0.07688567253273439, "learning_rate": 0.00014823462854625033, "loss": 0.8431, "step": 101060 }, { "epoch": 1.7744342421741954, "grad_norm": 0.048306066993318714, "learning_rate": 0.00014822483478209834, "loss": 0.8437, "step": 101070 }, { "epoch": 1.774609807054197, "grad_norm": 0.056960795923288246, "learning_rate": 0.0001482150404195987, "loss": 0.853, "step": 101080 }, { "epoch": 1.7747853719341982, "grad_norm": 0.0684016241423637, "learning_rate": 0.00014820524545887542, "loss": 0.8472, "step": 101090 }, { "epoch": 1.7749609368141996, "grad_norm": 0.0548586571888273, "learning_rate": 0.0001481954499000526, "loss": 0.8449, "step": 101100 }, { "epoch": 1.7751365016942011, "grad_norm": 0.054664972028250304, "learning_rate": 0.00014818565374325441, "loss": 0.836, "step": 101110 }, { "epoch": 1.7753120665742026, "grad_norm": 0.06549195241553156, "learning_rate": 0.00014817585698860495, "loss": 0.8451, "step": 101120 }, { "epoch": 1.7754876314542039, "grad_norm": 0.0615898116067138, "learning_rate": 0.0001481660596362283, "loss": 0.8455, "step": 101130 }, { "epoch": 1.7756631963342053, "grad_norm": 0.050525238111119766, "learning_rate": 0.0001481562616862487, "loss": 0.8359, "step": 101140 }, { "epoch": 1.7758387612142066, "grad_norm": 0.06543317997384115, "learning_rate": 0.00014814646313879016, "loss": 0.8517, "step": 101150 }, { "epoch": 1.776014326094208, "grad_norm": 0.07318967959777525, "learning_rate": 0.00014813666399397695, "loss": 0.846, "step": 101160 }, { "epoch": 1.7761898909742095, "grad_norm": 0.0501240776984155, "learning_rate": 0.00014812686425193315, "loss": 0.851, "step": 101170 }, { "epoch": 1.776365455854211, "grad_norm": 0.09731336800374236, "learning_rate": 0.00014811706391278296, "loss": 0.8408, "step": 101180 }, { "epoch": 1.7765410207342123, "grad_norm": 0.072232811738798, "learning_rate": 0.00014810726297665052, "loss": 0.839, "step": 101190 }, { "epoch": 1.7767165856142138, "grad_norm": 0.09995607463330111, "learning_rate": 0.00014809746144366003, "loss": 0.8495, "step": 101200 }, { "epoch": 1.776892150494215, "grad_norm": 0.0660836264462974, "learning_rate": 0.0001480876593139357, "loss": 0.8521, "step": 101210 }, { "epoch": 1.7770677153742165, "grad_norm": 0.06392327709092102, "learning_rate": 0.0001480778565876017, "loss": 0.851, "step": 101220 }, { "epoch": 1.777243280254218, "grad_norm": 0.06581778805393063, "learning_rate": 0.00014806805326478224, "loss": 0.8514, "step": 101230 }, { "epoch": 1.7774188451342194, "grad_norm": 0.04420255280691539, "learning_rate": 0.0001480582493456015, "loss": 0.8537, "step": 101240 }, { "epoch": 1.777594410014221, "grad_norm": 0.07008393464198444, "learning_rate": 0.00014804844483018372, "loss": 0.8548, "step": 101250 }, { "epoch": 1.7777699748942222, "grad_norm": 0.059887344745140514, "learning_rate": 0.00014803863971865315, "loss": 0.8454, "step": 101260 }, { "epoch": 1.7779455397742234, "grad_norm": 0.05046782718249097, "learning_rate": 0.000148028834011134, "loss": 0.853, "step": 101270 }, { "epoch": 1.778121104654225, "grad_norm": 0.05420184002693551, "learning_rate": 0.0001480190277077505, "loss": 0.85, "step": 101280 }, { "epoch": 1.7782966695342264, "grad_norm": 0.09376333104647253, "learning_rate": 0.0001480092208086269, "loss": 0.8494, "step": 101290 }, { "epoch": 1.7784722344142279, "grad_norm": 0.04946686505756815, "learning_rate": 0.00014799941331388746, "loss": 0.8411, "step": 101300 }, { "epoch": 1.7786477992942293, "grad_norm": 0.06190400029455918, "learning_rate": 0.00014798960522365647, "loss": 0.8494, "step": 101310 }, { "epoch": 1.7788233641742306, "grad_norm": 0.05823569898665038, "learning_rate": 0.00014797979653805816, "loss": 0.8462, "step": 101320 }, { "epoch": 1.7789989290542318, "grad_norm": 0.0718487488360185, "learning_rate": 0.00014796998725721683, "loss": 0.8535, "step": 101330 }, { "epoch": 1.7791744939342333, "grad_norm": 0.061922170145356524, "learning_rate": 0.00014796017738125675, "loss": 0.8549, "step": 101340 }, { "epoch": 1.7793500588142348, "grad_norm": 0.04583549245291089, "learning_rate": 0.0001479503669103022, "loss": 0.8438, "step": 101350 }, { "epoch": 1.7795256236942363, "grad_norm": 0.047970068895734845, "learning_rate": 0.00014794055584447754, "loss": 0.8459, "step": 101360 }, { "epoch": 1.7797011885742378, "grad_norm": 0.049584863851275904, "learning_rate": 0.00014793074418390704, "loss": 0.8432, "step": 101370 }, { "epoch": 1.779876753454239, "grad_norm": 0.05152334878264013, "learning_rate": 0.00014792093192871503, "loss": 0.852, "step": 101380 }, { "epoch": 1.7800523183342403, "grad_norm": 0.06557856343106847, "learning_rate": 0.0001479111190790258, "loss": 0.8493, "step": 101390 }, { "epoch": 1.7802278832142417, "grad_norm": 0.06324425931943413, "learning_rate": 0.00014790130563496371, "loss": 0.8396, "step": 101400 }, { "epoch": 1.7804034480942432, "grad_norm": 0.060978057819076475, "learning_rate": 0.00014789149159665304, "loss": 0.8448, "step": 101410 }, { "epoch": 1.7805790129742447, "grad_norm": 0.05403525395492698, "learning_rate": 0.00014788167696421826, "loss": 0.8471, "step": 101420 }, { "epoch": 1.7807545778542462, "grad_norm": 0.09556747109066605, "learning_rate": 0.0001478718617377836, "loss": 0.8463, "step": 101430 }, { "epoch": 1.7809301427342474, "grad_norm": 0.06018654051923663, "learning_rate": 0.0001478620459174735, "loss": 0.8451, "step": 101440 }, { "epoch": 1.7811057076142487, "grad_norm": 0.06430974489171216, "learning_rate": 0.00014785222950341228, "loss": 0.8408, "step": 101450 }, { "epoch": 1.7812812724942502, "grad_norm": 0.0559508951009287, "learning_rate": 0.00014784241249572432, "loss": 0.8451, "step": 101460 }, { "epoch": 1.7814568373742516, "grad_norm": 0.055620714110868504, "learning_rate": 0.00014783259489453403, "loss": 0.8477, "step": 101470 }, { "epoch": 1.7816324022542531, "grad_norm": 0.06577602787601951, "learning_rate": 0.0001478227766999658, "loss": 0.8489, "step": 101480 }, { "epoch": 1.7818079671342546, "grad_norm": 0.05292902977926135, "learning_rate": 0.000147812957912144, "loss": 0.8457, "step": 101490 }, { "epoch": 1.7819835320142559, "grad_norm": 0.04397960627941302, "learning_rate": 0.00014780313853119306, "loss": 0.8446, "step": 101500 }, { "epoch": 1.782159096894257, "grad_norm": 0.06679736825268157, "learning_rate": 0.00014779331855723735, "loss": 0.8475, "step": 101510 }, { "epoch": 1.7823346617742586, "grad_norm": 0.04961168977111472, "learning_rate": 0.00014778349799040138, "loss": 0.8393, "step": 101520 }, { "epoch": 1.78251022665426, "grad_norm": 0.06454017177268961, "learning_rate": 0.00014777367683080947, "loss": 0.8407, "step": 101530 }, { "epoch": 1.7826857915342615, "grad_norm": 0.07343562239440282, "learning_rate": 0.00014776385507858617, "loss": 0.8426, "step": 101540 }, { "epoch": 1.782861356414263, "grad_norm": 0.06359886007828307, "learning_rate": 0.00014775403273385585, "loss": 0.8467, "step": 101550 }, { "epoch": 1.7830369212942643, "grad_norm": 0.06024307554375262, "learning_rate": 0.00014774420979674294, "loss": 0.841, "step": 101560 }, { "epoch": 1.7832124861742658, "grad_norm": 0.0530713906080161, "learning_rate": 0.00014773438626737196, "loss": 0.8457, "step": 101570 }, { "epoch": 1.783388051054267, "grad_norm": 0.059397641105539786, "learning_rate": 0.00014772456214586733, "loss": 0.8422, "step": 101580 }, { "epoch": 1.7835636159342685, "grad_norm": 0.07803799157000206, "learning_rate": 0.00014771473743235353, "loss": 0.8459, "step": 101590 }, { "epoch": 1.78373918081427, "grad_norm": 0.06235103998629851, "learning_rate": 0.0001477049121269551, "loss": 0.8467, "step": 101600 }, { "epoch": 1.7839147456942714, "grad_norm": 0.06034672750187838, "learning_rate": 0.00014769508622979643, "loss": 0.848, "step": 101610 }, { "epoch": 1.7840903105742727, "grad_norm": 0.05770953330332583, "learning_rate": 0.0001476852597410021, "loss": 0.849, "step": 101620 }, { "epoch": 1.7842658754542742, "grad_norm": 0.055126047522359174, "learning_rate": 0.0001476754326606966, "loss": 0.8421, "step": 101630 }, { "epoch": 1.7844414403342754, "grad_norm": 0.05996327242886333, "learning_rate": 0.00014766560498900443, "loss": 0.8481, "step": 101640 }, { "epoch": 1.784617005214277, "grad_norm": 0.05246913226024682, "learning_rate": 0.00014765577672605008, "loss": 0.8399, "step": 101650 }, { "epoch": 1.7847925700942784, "grad_norm": 0.05494524213105213, "learning_rate": 0.00014764594787195807, "loss": 0.8473, "step": 101660 }, { "epoch": 1.7849681349742799, "grad_norm": 0.060479923560556044, "learning_rate": 0.000147636118426853, "loss": 0.8524, "step": 101670 }, { "epoch": 1.7851436998542811, "grad_norm": 0.05372316787884148, "learning_rate": 0.00014762628839085933, "loss": 0.8521, "step": 101680 }, { "epoch": 1.7853192647342826, "grad_norm": 0.06939730938543813, "learning_rate": 0.00014761645776410168, "loss": 0.8438, "step": 101690 }, { "epoch": 1.7854948296142839, "grad_norm": 0.07083962537369573, "learning_rate": 0.00014760662654670457, "loss": 0.8404, "step": 101700 }, { "epoch": 1.7856703944942853, "grad_norm": 0.05248351872528578, "learning_rate": 0.00014759679473879257, "loss": 0.8519, "step": 101710 }, { "epoch": 1.7858459593742868, "grad_norm": 0.07193517357938481, "learning_rate": 0.00014758696234049026, "loss": 0.8435, "step": 101720 }, { "epoch": 1.7860215242542883, "grad_norm": 0.05074871076308554, "learning_rate": 0.0001475771293519222, "loss": 0.8492, "step": 101730 }, { "epoch": 1.7861970891342895, "grad_norm": 0.06581942303531119, "learning_rate": 0.00014756729577321297, "loss": 0.8468, "step": 101740 }, { "epoch": 1.786372654014291, "grad_norm": 0.07734018984056511, "learning_rate": 0.00014755746160448717, "loss": 0.846, "step": 101750 }, { "epoch": 1.7865482188942923, "grad_norm": 0.06453578337164805, "learning_rate": 0.00014754762684586947, "loss": 0.8476, "step": 101760 }, { "epoch": 1.7867237837742938, "grad_norm": 0.05074343294943287, "learning_rate": 0.00014753779149748436, "loss": 0.8399, "step": 101770 }, { "epoch": 1.7868993486542952, "grad_norm": 0.05114710617510233, "learning_rate": 0.00014752795555945657, "loss": 0.8468, "step": 101780 }, { "epoch": 1.7870749135342967, "grad_norm": 0.056488428835735904, "learning_rate": 0.0001475181190319106, "loss": 0.8475, "step": 101790 }, { "epoch": 1.787250478414298, "grad_norm": 0.051217428808915104, "learning_rate": 0.00014750828191497116, "loss": 0.8555, "step": 101800 }, { "epoch": 1.7874260432942994, "grad_norm": 0.05375236945795411, "learning_rate": 0.0001474984442087629, "loss": 0.8453, "step": 101810 }, { "epoch": 1.7876016081743007, "grad_norm": 0.05269378744140611, "learning_rate": 0.00014748860591341044, "loss": 0.8431, "step": 101820 }, { "epoch": 1.7877771730543022, "grad_norm": 0.04814050045848598, "learning_rate": 0.00014747876702903842, "loss": 0.8438, "step": 101830 }, { "epoch": 1.7879527379343036, "grad_norm": 0.06697053664042446, "learning_rate": 0.00014746892755577152, "loss": 0.8416, "step": 101840 }, { "epoch": 1.7881283028143051, "grad_norm": 0.043050313054701646, "learning_rate": 0.00014745908749373438, "loss": 0.8401, "step": 101850 }, { "epoch": 1.7883038676943064, "grad_norm": 0.07047753940700073, "learning_rate": 0.0001474492468430517, "loss": 0.8533, "step": 101860 }, { "epoch": 1.7884794325743079, "grad_norm": 0.06298023151847378, "learning_rate": 0.0001474394056038482, "loss": 0.8464, "step": 101870 }, { "epoch": 1.7886549974543091, "grad_norm": 0.06643951785659453, "learning_rate": 0.0001474295637762485, "loss": 0.8461, "step": 101880 }, { "epoch": 1.7888305623343106, "grad_norm": 0.06444923881257214, "learning_rate": 0.00014741972136037733, "loss": 0.8406, "step": 101890 }, { "epoch": 1.789006127214312, "grad_norm": 0.05757107251178884, "learning_rate": 0.0001474098783563594, "loss": 0.8459, "step": 101900 }, { "epoch": 1.7891816920943135, "grad_norm": 0.056272644738241784, "learning_rate": 0.00014740003476431944, "loss": 0.8576, "step": 101910 }, { "epoch": 1.7893572569743148, "grad_norm": 0.0770077432479371, "learning_rate": 0.00014739019058438213, "loss": 0.8503, "step": 101920 }, { "epoch": 1.7895328218543163, "grad_norm": 0.05882316841005904, "learning_rate": 0.0001473803458166722, "loss": 0.8403, "step": 101930 }, { "epoch": 1.7897083867343175, "grad_norm": 0.055109512564710236, "learning_rate": 0.00014737050046131442, "loss": 0.841, "step": 101940 }, { "epoch": 1.789883951614319, "grad_norm": 0.04839498738869534, "learning_rate": 0.0001473606545184335, "loss": 0.8499, "step": 101950 }, { "epoch": 1.7900595164943205, "grad_norm": 0.0639570648007624, "learning_rate": 0.0001473508079881542, "loss": 0.8454, "step": 101960 }, { "epoch": 1.790235081374322, "grad_norm": 0.05862913615571413, "learning_rate": 0.00014734096087060127, "loss": 0.8565, "step": 101970 }, { "epoch": 1.7904106462543234, "grad_norm": 0.05466912141904713, "learning_rate": 0.0001473311131658995, "loss": 0.8447, "step": 101980 }, { "epoch": 1.7905862111343247, "grad_norm": 0.05277488870897125, "learning_rate": 0.00014732126487417363, "loss": 0.8503, "step": 101990 }, { "epoch": 1.790761776014326, "grad_norm": 0.0754790178391736, "learning_rate": 0.0001473114159955485, "loss": 0.8532, "step": 102000 }, { "epoch": 1.7909373408943274, "grad_norm": 0.060038014133459, "learning_rate": 0.00014730156653014882, "loss": 0.8453, "step": 102010 }, { "epoch": 1.791112905774329, "grad_norm": 0.05223794767526734, "learning_rate": 0.0001472917164780994, "loss": 0.8389, "step": 102020 }, { "epoch": 1.7912884706543304, "grad_norm": 0.07767486092138637, "learning_rate": 0.0001472818658395251, "loss": 0.8506, "step": 102030 }, { "epoch": 1.7914640355343319, "grad_norm": 0.06351692640950389, "learning_rate": 0.00014727201461455067, "loss": 0.8454, "step": 102040 }, { "epoch": 1.7916396004143331, "grad_norm": 0.05823319581637103, "learning_rate": 0.00014726216280330095, "loss": 0.8525, "step": 102050 }, { "epoch": 1.7918151652943344, "grad_norm": 0.04903576617211271, "learning_rate": 0.00014725231040590078, "loss": 0.8452, "step": 102060 }, { "epoch": 1.7919907301743359, "grad_norm": 0.04740030860691719, "learning_rate": 0.00014724245742247494, "loss": 0.8471, "step": 102070 }, { "epoch": 1.7921662950543373, "grad_norm": 0.04719420397710098, "learning_rate": 0.00014723260385314833, "loss": 0.8451, "step": 102080 }, { "epoch": 1.7923418599343388, "grad_norm": 0.06674261880379148, "learning_rate": 0.00014722274969804574, "loss": 0.8489, "step": 102090 }, { "epoch": 1.7925174248143403, "grad_norm": 0.05430970113027364, "learning_rate": 0.00014721289495729206, "loss": 0.8474, "step": 102100 }, { "epoch": 1.7926929896943415, "grad_norm": 0.06357294563170299, "learning_rate": 0.0001472030396310122, "loss": 0.8431, "step": 102110 }, { "epoch": 1.7928685545743428, "grad_norm": 0.055814020357151245, "learning_rate": 0.0001471931837193309, "loss": 0.8479, "step": 102120 }, { "epoch": 1.7930441194543443, "grad_norm": 0.05706718441674296, "learning_rate": 0.00014718332722237314, "loss": 0.854, "step": 102130 }, { "epoch": 1.7932196843343458, "grad_norm": 0.06901388599966833, "learning_rate": 0.00014717347014026378, "loss": 0.8461, "step": 102140 }, { "epoch": 1.7933952492143472, "grad_norm": 0.04366296156272304, "learning_rate": 0.00014716361247312768, "loss": 0.8443, "step": 102150 }, { "epoch": 1.7935708140943487, "grad_norm": 0.054147528180751525, "learning_rate": 0.00014715375422108976, "loss": 0.8498, "step": 102160 }, { "epoch": 1.79374637897435, "grad_norm": 0.05788233516747634, "learning_rate": 0.00014714389538427494, "loss": 0.8445, "step": 102170 }, { "epoch": 1.7939219438543512, "grad_norm": 0.0817055486579092, "learning_rate": 0.00014713403596280813, "loss": 0.8434, "step": 102180 }, { "epoch": 1.7940975087343527, "grad_norm": 0.047536495711713575, "learning_rate": 0.00014712417595681425, "loss": 0.8505, "step": 102190 }, { "epoch": 1.7942730736143542, "grad_norm": 0.048547500733285634, "learning_rate": 0.00014711431536641823, "loss": 0.8412, "step": 102200 }, { "epoch": 1.7944486384943557, "grad_norm": 0.047268529633787436, "learning_rate": 0.00014710445419174496, "loss": 0.8474, "step": 102210 }, { "epoch": 1.7946242033743571, "grad_norm": 0.06132348267493282, "learning_rate": 0.00014709459243291944, "loss": 0.8421, "step": 102220 }, { "epoch": 1.7947997682543584, "grad_norm": 0.05708710685207058, "learning_rate": 0.0001470847300900666, "loss": 0.8454, "step": 102230 }, { "epoch": 1.7949753331343596, "grad_norm": 0.05154143359238834, "learning_rate": 0.00014707486716331137, "loss": 0.8478, "step": 102240 }, { "epoch": 1.7951508980143611, "grad_norm": 0.05925004353643488, "learning_rate": 0.00014706500365277878, "loss": 0.85, "step": 102250 }, { "epoch": 1.7953264628943626, "grad_norm": 0.0726254967199455, "learning_rate": 0.00014705513955859373, "loss": 0.8412, "step": 102260 }, { "epoch": 1.795502027774364, "grad_norm": 0.06378930706408324, "learning_rate": 0.00014704527488088127, "loss": 0.8483, "step": 102270 }, { "epoch": 1.7956775926543656, "grad_norm": 0.06104439701786397, "learning_rate": 0.00014703540961976636, "loss": 0.846, "step": 102280 }, { "epoch": 1.7958531575343668, "grad_norm": 0.0632376086451732, "learning_rate": 0.00014702554377537394, "loss": 0.8576, "step": 102290 }, { "epoch": 1.7960287224143683, "grad_norm": 0.06101145643945044, "learning_rate": 0.0001470156773478291, "loss": 0.8496, "step": 102300 }, { "epoch": 1.7962042872943695, "grad_norm": 0.054766342471326585, "learning_rate": 0.0001470058103372568, "loss": 0.8492, "step": 102310 }, { "epoch": 1.796379852174371, "grad_norm": 0.057436648538460804, "learning_rate": 0.00014699594274378204, "loss": 0.8431, "step": 102320 }, { "epoch": 1.7965554170543725, "grad_norm": 0.0660988485255681, "learning_rate": 0.0001469860745675299, "loss": 0.8512, "step": 102330 }, { "epoch": 1.796730981934374, "grad_norm": 0.07351848577306126, "learning_rate": 0.0001469762058086254, "loss": 0.8508, "step": 102340 }, { "epoch": 1.7969065468143752, "grad_norm": 0.07747063522398563, "learning_rate": 0.00014696633646719355, "loss": 0.8471, "step": 102350 }, { "epoch": 1.7970821116943767, "grad_norm": 0.07062377756482936, "learning_rate": 0.00014695646654335937, "loss": 0.8531, "step": 102360 }, { "epoch": 1.797257676574378, "grad_norm": 0.04703631601040788, "learning_rate": 0.00014694659603724797, "loss": 0.8496, "step": 102370 }, { "epoch": 1.7974332414543794, "grad_norm": 0.06556615885234554, "learning_rate": 0.0001469367249489844, "loss": 0.8449, "step": 102380 }, { "epoch": 1.797608806334381, "grad_norm": 0.0655673188562377, "learning_rate": 0.0001469268532786937, "loss": 0.8469, "step": 102390 }, { "epoch": 1.7977843712143824, "grad_norm": 0.0559139939919967, "learning_rate": 0.000146916981026501, "loss": 0.8418, "step": 102400 }, { "epoch": 1.7979599360943836, "grad_norm": 0.050974723687880454, "learning_rate": 0.00014690710819253134, "loss": 0.8515, "step": 102410 }, { "epoch": 1.7981355009743851, "grad_norm": 0.07335299773173554, "learning_rate": 0.00014689723477690983, "loss": 0.8439, "step": 102420 }, { "epoch": 1.7983110658543864, "grad_norm": 0.04932177645946533, "learning_rate": 0.0001468873607797615, "loss": 0.8409, "step": 102430 }, { "epoch": 1.7984866307343879, "grad_norm": 0.05934820237665754, "learning_rate": 0.00014687748620121157, "loss": 0.8456, "step": 102440 }, { "epoch": 1.7986621956143893, "grad_norm": 0.05785393724458521, "learning_rate": 0.00014686761104138512, "loss": 0.8464, "step": 102450 }, { "epoch": 1.7988377604943908, "grad_norm": 0.0726213398144918, "learning_rate": 0.0001468577353004072, "loss": 0.8523, "step": 102460 }, { "epoch": 1.799013325374392, "grad_norm": 0.04414846024966255, "learning_rate": 0.000146847858978403, "loss": 0.8456, "step": 102470 }, { "epoch": 1.7991888902543935, "grad_norm": 0.06991075043830469, "learning_rate": 0.00014683798207549764, "loss": 0.8392, "step": 102480 }, { "epoch": 1.7993644551343948, "grad_norm": 0.0519246356328407, "learning_rate": 0.00014682810459181627, "loss": 0.8544, "step": 102490 }, { "epoch": 1.7995400200143963, "grad_norm": 0.06411567447937468, "learning_rate": 0.00014681822652748403, "loss": 0.8509, "step": 102500 }, { "epoch": 1.7997155848943978, "grad_norm": 0.06775988924371534, "learning_rate": 0.00014680834788262606, "loss": 0.8368, "step": 102510 }, { "epoch": 1.7998911497743992, "grad_norm": 0.06566946344125431, "learning_rate": 0.00014679846865736757, "loss": 0.8467, "step": 102520 }, { "epoch": 1.8000667146544005, "grad_norm": 0.05488017583106747, "learning_rate": 0.00014678858885183369, "loss": 0.8436, "step": 102530 }, { "epoch": 1.800242279534402, "grad_norm": 0.059157672065591305, "learning_rate": 0.00014677870846614962, "loss": 0.8458, "step": 102540 }, { "epoch": 1.8004178444144032, "grad_norm": 0.06247354606468738, "learning_rate": 0.0001467688275004405, "loss": 0.8569, "step": 102550 }, { "epoch": 1.8005934092944047, "grad_norm": 0.14427974213454114, "learning_rate": 0.00014675894595483164, "loss": 0.8502, "step": 102560 }, { "epoch": 1.8007689741744062, "grad_norm": 0.05669505632045852, "learning_rate": 0.00014674906382944813, "loss": 0.8511, "step": 102570 }, { "epoch": 1.8009445390544077, "grad_norm": 0.054482124949102026, "learning_rate": 0.0001467391811244152, "loss": 0.8443, "step": 102580 }, { "epoch": 1.801120103934409, "grad_norm": 0.047412894677076635, "learning_rate": 0.0001467292978398581, "loss": 0.8481, "step": 102590 }, { "epoch": 1.8012956688144104, "grad_norm": 0.06692624383561993, "learning_rate": 0.00014671941397590202, "loss": 0.8507, "step": 102600 }, { "epoch": 1.8014712336944116, "grad_norm": 0.05676479853088661, "learning_rate": 0.0001467095295326722, "loss": 0.8469, "step": 102610 }, { "epoch": 1.8016467985744131, "grad_norm": 0.06421243798284011, "learning_rate": 0.00014669964451029388, "loss": 0.8424, "step": 102620 }, { "epoch": 1.8018223634544146, "grad_norm": 0.050277080055847745, "learning_rate": 0.0001466897589088923, "loss": 0.8488, "step": 102630 }, { "epoch": 1.801997928334416, "grad_norm": 0.043581039997295005, "learning_rate": 0.00014667987272859275, "loss": 0.845, "step": 102640 }, { "epoch": 1.8021734932144173, "grad_norm": 0.06576789366523701, "learning_rate": 0.00014666998596952043, "loss": 0.8503, "step": 102650 }, { "epoch": 1.8023490580944188, "grad_norm": 0.05689923058792311, "learning_rate": 0.00014666009863180063, "loss": 0.8399, "step": 102660 }, { "epoch": 1.80252462297442, "grad_norm": 0.04834007031991282, "learning_rate": 0.00014665021071555865, "loss": 0.8497, "step": 102670 }, { "epoch": 1.8027001878544215, "grad_norm": 0.0601930436144198, "learning_rate": 0.00014664032222091972, "loss": 0.8502, "step": 102680 }, { "epoch": 1.802875752734423, "grad_norm": 0.05294405376999718, "learning_rate": 0.0001466304331480092, "loss": 0.8399, "step": 102690 }, { "epoch": 1.8030513176144245, "grad_norm": 0.07116214471597206, "learning_rate": 0.0001466205434969523, "loss": 0.8462, "step": 102700 }, { "epoch": 1.803226882494426, "grad_norm": 0.07211411370570404, "learning_rate": 0.00014661065326787445, "loss": 0.8469, "step": 102710 }, { "epoch": 1.8034024473744272, "grad_norm": 0.06452046660009543, "learning_rate": 0.0001466007624609008, "loss": 0.8512, "step": 102720 }, { "epoch": 1.8035780122544285, "grad_norm": 0.05218353548141175, "learning_rate": 0.00014659087107615678, "loss": 0.85, "step": 102730 }, { "epoch": 1.80375357713443, "grad_norm": 0.07336222857370284, "learning_rate": 0.0001465809791137677, "loss": 0.8504, "step": 102740 }, { "epoch": 1.8039291420144314, "grad_norm": 0.05059859873379582, "learning_rate": 0.00014657108657385888, "loss": 0.8504, "step": 102750 }, { "epoch": 1.804104706894433, "grad_norm": 0.05640139239242346, "learning_rate": 0.00014656119345655565, "loss": 0.852, "step": 102760 }, { "epoch": 1.8042802717744344, "grad_norm": 0.08111423002215667, "learning_rate": 0.00014655129976198336, "loss": 0.8428, "step": 102770 }, { "epoch": 1.8044558366544357, "grad_norm": 0.06986590586677832, "learning_rate": 0.00014654140549026737, "loss": 0.8529, "step": 102780 }, { "epoch": 1.804631401534437, "grad_norm": 0.051300561800302416, "learning_rate": 0.00014653151064153308, "loss": 0.8397, "step": 102790 }, { "epoch": 1.8048069664144384, "grad_norm": 0.059081122853188686, "learning_rate": 0.00014652161521590577, "loss": 0.8508, "step": 102800 }, { "epoch": 1.8049825312944399, "grad_norm": 0.05965470119356715, "learning_rate": 0.0001465117192135109, "loss": 0.842, "step": 102810 }, { "epoch": 1.8051580961744413, "grad_norm": 0.06652117929138983, "learning_rate": 0.0001465018226344738, "loss": 0.8488, "step": 102820 }, { "epoch": 1.8053336610544428, "grad_norm": 0.06043724070397839, "learning_rate": 0.00014649192547891993, "loss": 0.8454, "step": 102830 }, { "epoch": 1.805509225934444, "grad_norm": 0.06515304983465933, "learning_rate": 0.0001464820277469746, "loss": 0.842, "step": 102840 }, { "epoch": 1.8056847908144453, "grad_norm": 0.05515946603659303, "learning_rate": 0.00014647212943876329, "loss": 0.8512, "step": 102850 }, { "epoch": 1.8058603556944468, "grad_norm": 0.05125553243979122, "learning_rate": 0.00014646223055441143, "loss": 0.85, "step": 102860 }, { "epoch": 1.8060359205744483, "grad_norm": 0.07257174827967791, "learning_rate": 0.0001464523310940443, "loss": 0.851, "step": 102870 }, { "epoch": 1.8062114854544498, "grad_norm": 0.050301528848681804, "learning_rate": 0.00014644243105778746, "loss": 0.8526, "step": 102880 }, { "epoch": 1.8063870503344512, "grad_norm": 0.07874986404392209, "learning_rate": 0.00014643253044576633, "loss": 0.8467, "step": 102890 }, { "epoch": 1.8065626152144525, "grad_norm": 0.08173423673495543, "learning_rate": 0.00014642262925810632, "loss": 0.8407, "step": 102900 }, { "epoch": 1.8067381800944537, "grad_norm": 0.048612812189853055, "learning_rate": 0.0001464127274949329, "loss": 0.8343, "step": 102910 }, { "epoch": 1.8069137449744552, "grad_norm": 0.06180629965574668, "learning_rate": 0.00014640282515637152, "loss": 0.8472, "step": 102920 }, { "epoch": 1.8070893098544567, "grad_norm": 0.052224699014824134, "learning_rate": 0.00014639292224254765, "loss": 0.8427, "step": 102930 }, { "epoch": 1.8072648747344582, "grad_norm": 0.051427798024711154, "learning_rate": 0.00014638301875358672, "loss": 0.8473, "step": 102940 }, { "epoch": 1.8074404396144597, "grad_norm": 0.05434897718518453, "learning_rate": 0.00014637311468961427, "loss": 0.8423, "step": 102950 }, { "epoch": 1.807616004494461, "grad_norm": 0.10078962652361632, "learning_rate": 0.00014636321005075578, "loss": 0.8488, "step": 102960 }, { "epoch": 1.8077915693744622, "grad_norm": 0.056048542543374734, "learning_rate": 0.0001463533048371367, "loss": 0.8436, "step": 102970 }, { "epoch": 1.8079671342544636, "grad_norm": 0.07119010917231683, "learning_rate": 0.00014634339904888257, "loss": 0.845, "step": 102980 }, { "epoch": 1.8081426991344651, "grad_norm": 0.07040376552692931, "learning_rate": 0.00014633349268611885, "loss": 0.8405, "step": 102990 }, { "epoch": 1.8083182640144666, "grad_norm": 0.05576101393203609, "learning_rate": 0.00014632358574897112, "loss": 0.8482, "step": 103000 }, { "epoch": 1.808493828894468, "grad_norm": 0.04913832419290911, "learning_rate": 0.00014631367823756487, "loss": 0.845, "step": 103010 }, { "epoch": 1.8086693937744693, "grad_norm": 0.04826688961638336, "learning_rate": 0.00014630377015202562, "loss": 0.844, "step": 103020 }, { "epoch": 1.8088449586544708, "grad_norm": 0.08105926824640003, "learning_rate": 0.00014629386149247895, "loss": 0.8504, "step": 103030 }, { "epoch": 1.809020523534472, "grad_norm": 0.060588607808499784, "learning_rate": 0.00014628395225905035, "loss": 0.8412, "step": 103040 }, { "epoch": 1.8091960884144735, "grad_norm": 0.04641771206057309, "learning_rate": 0.0001462740424518654, "loss": 0.8477, "step": 103050 }, { "epoch": 1.809371653294475, "grad_norm": 0.050158882885466625, "learning_rate": 0.00014626413207104963, "loss": 0.8569, "step": 103060 }, { "epoch": 1.8095472181744765, "grad_norm": 0.05612182872990823, "learning_rate": 0.00014625422111672867, "loss": 0.8467, "step": 103070 }, { "epoch": 1.8097227830544778, "grad_norm": 0.06289540795904029, "learning_rate": 0.00014624430958902807, "loss": 0.8496, "step": 103080 }, { "epoch": 1.8098983479344792, "grad_norm": 0.06079955219843222, "learning_rate": 0.00014623439748807335, "loss": 0.8418, "step": 103090 }, { "epoch": 1.8100739128144805, "grad_norm": 0.06268025854893741, "learning_rate": 0.00014622448481399017, "loss": 0.8358, "step": 103100 }, { "epoch": 1.810249477694482, "grad_norm": 0.05474224260917414, "learning_rate": 0.00014621457156690408, "loss": 0.8399, "step": 103110 }, { "epoch": 1.8104250425744834, "grad_norm": 0.05288035806388404, "learning_rate": 0.00014620465774694072, "loss": 0.8456, "step": 103120 }, { "epoch": 1.810600607454485, "grad_norm": 0.054428753205340207, "learning_rate": 0.00014619474335422568, "loss": 0.842, "step": 103130 }, { "epoch": 1.8107761723344862, "grad_norm": 0.05635648795165411, "learning_rate": 0.00014618482838888458, "loss": 0.8525, "step": 103140 }, { "epoch": 1.8109517372144877, "grad_norm": 0.05623448223007993, "learning_rate": 0.00014617491285104305, "loss": 0.8585, "step": 103150 }, { "epoch": 1.811127302094489, "grad_norm": 0.055697183491576714, "learning_rate": 0.00014616499674082668, "loss": 0.8498, "step": 103160 }, { "epoch": 1.8113028669744904, "grad_norm": 0.06817063349173819, "learning_rate": 0.0001461550800583612, "loss": 0.8416, "step": 103170 }, { "epoch": 1.8114784318544919, "grad_norm": 0.07173965895085183, "learning_rate": 0.00014614516280377218, "loss": 0.8542, "step": 103180 }, { "epoch": 1.8116539967344933, "grad_norm": 0.060798855373964955, "learning_rate": 0.0001461352449771853, "loss": 0.8447, "step": 103190 }, { "epoch": 1.8118295616144946, "grad_norm": 0.06041484436336685, "learning_rate": 0.0001461253265787262, "loss": 0.8448, "step": 103200 }, { "epoch": 1.812005126494496, "grad_norm": 0.07348035062438414, "learning_rate": 0.00014611540760852055, "loss": 0.8485, "step": 103210 }, { "epoch": 1.8121806913744973, "grad_norm": 0.045164826863984514, "learning_rate": 0.00014610548806669407, "loss": 0.8547, "step": 103220 }, { "epoch": 1.8123562562544988, "grad_norm": 0.04596292703991063, "learning_rate": 0.00014609556795337243, "loss": 0.8402, "step": 103230 }, { "epoch": 1.8125318211345003, "grad_norm": 0.0638509902013268, "learning_rate": 0.00014608564726868126, "loss": 0.8413, "step": 103240 }, { "epoch": 1.8127073860145018, "grad_norm": 0.05772036069088936, "learning_rate": 0.00014607572601274633, "loss": 0.8502, "step": 103250 }, { "epoch": 1.812882950894503, "grad_norm": 0.06361603793744569, "learning_rate": 0.00014606580418569326, "loss": 0.8395, "step": 103260 }, { "epoch": 1.8130585157745045, "grad_norm": 0.07136299763125004, "learning_rate": 0.0001460558817876479, "loss": 0.8457, "step": 103270 }, { "epoch": 1.8132340806545058, "grad_norm": 0.052942732908552184, "learning_rate": 0.0001460459588187358, "loss": 0.8516, "step": 103280 }, { "epoch": 1.8134096455345072, "grad_norm": 0.07404416061992061, "learning_rate": 0.00014603603527908282, "loss": 0.8463, "step": 103290 }, { "epoch": 1.8135852104145087, "grad_norm": 0.0482229821194097, "learning_rate": 0.0001460261111688146, "loss": 0.8491, "step": 103300 }, { "epoch": 1.8137607752945102, "grad_norm": 0.048241560696166697, "learning_rate": 0.00014601618648805696, "loss": 0.857, "step": 103310 }, { "epoch": 1.8139363401745114, "grad_norm": 0.04609429241029781, "learning_rate": 0.0001460062612369356, "loss": 0.8467, "step": 103320 }, { "epoch": 1.814111905054513, "grad_norm": 0.08982020736651947, "learning_rate": 0.00014599633541557627, "loss": 0.8424, "step": 103330 }, { "epoch": 1.8142874699345142, "grad_norm": 0.07795169605573436, "learning_rate": 0.00014598640902410476, "loss": 0.8477, "step": 103340 }, { "epoch": 1.8144630348145157, "grad_norm": 0.0543265724755221, "learning_rate": 0.00014597648206264688, "loss": 0.8487, "step": 103350 }, { "epoch": 1.8146385996945171, "grad_norm": 0.06255385112448957, "learning_rate": 0.00014596655453132825, "loss": 0.8452, "step": 103360 }, { "epoch": 1.8148141645745186, "grad_norm": 0.06035038773929551, "learning_rate": 0.00014595662643027482, "loss": 0.8443, "step": 103370 }, { "epoch": 1.81498972945452, "grad_norm": 0.09771322139211523, "learning_rate": 0.00014594669775961228, "loss": 0.8437, "step": 103380 }, { "epoch": 1.8151652943345213, "grad_norm": 0.06570542889063487, "learning_rate": 0.00014593676851946648, "loss": 0.8438, "step": 103390 }, { "epoch": 1.8153408592145226, "grad_norm": 0.06373262775142734, "learning_rate": 0.00014592683870996321, "loss": 0.8515, "step": 103400 }, { "epoch": 1.815516424094524, "grad_norm": 0.09405414117872206, "learning_rate": 0.0001459169083312283, "loss": 0.8559, "step": 103410 }, { "epoch": 1.8156919889745256, "grad_norm": 0.0535352041598483, "learning_rate": 0.00014590697738338756, "loss": 0.8451, "step": 103420 }, { "epoch": 1.815867553854527, "grad_norm": 0.06907554168511587, "learning_rate": 0.00014589704586656679, "loss": 0.8488, "step": 103430 }, { "epoch": 1.8160431187345285, "grad_norm": 0.04574795761773199, "learning_rate": 0.00014588711378089183, "loss": 0.8448, "step": 103440 }, { "epoch": 1.8162186836145298, "grad_norm": 0.0783453909586768, "learning_rate": 0.00014587718112648855, "loss": 0.8467, "step": 103450 }, { "epoch": 1.816394248494531, "grad_norm": 0.050707831457041955, "learning_rate": 0.0001458672479034828, "loss": 0.8544, "step": 103460 }, { "epoch": 1.8165698133745325, "grad_norm": 0.07770511811979791, "learning_rate": 0.0001458573141120004, "loss": 0.8431, "step": 103470 }, { "epoch": 1.816745378254534, "grad_norm": 0.07697485645267162, "learning_rate": 0.00014584737975216724, "loss": 0.8449, "step": 103480 }, { "epoch": 1.8169209431345354, "grad_norm": 0.06805441738519118, "learning_rate": 0.00014583744482410922, "loss": 0.8475, "step": 103490 }, { "epoch": 1.817096508014537, "grad_norm": 0.061114649497454675, "learning_rate": 0.00014582750932795214, "loss": 0.839, "step": 103500 }, { "epoch": 1.8172720728945382, "grad_norm": 0.046417769343516796, "learning_rate": 0.0001458175732638219, "loss": 0.8454, "step": 103510 }, { "epoch": 1.8174476377745394, "grad_norm": 0.059762433345334445, "learning_rate": 0.00014580763663184446, "loss": 0.8445, "step": 103520 }, { "epoch": 1.817623202654541, "grad_norm": 0.0996982836794326, "learning_rate": 0.00014579769943214566, "loss": 0.8454, "step": 103530 }, { "epoch": 1.8177987675345424, "grad_norm": 0.06967210442291118, "learning_rate": 0.00014578776166485145, "loss": 0.8426, "step": 103540 }, { "epoch": 1.8179743324145439, "grad_norm": 0.062291572659844786, "learning_rate": 0.00014577782333008768, "loss": 0.8496, "step": 103550 }, { "epoch": 1.8181498972945453, "grad_norm": 0.05732654100637381, "learning_rate": 0.00014576788442798033, "loss": 0.8474, "step": 103560 }, { "epoch": 1.8183254621745466, "grad_norm": 0.046065874302085856, "learning_rate": 0.0001457579449586553, "loss": 0.8456, "step": 103570 }, { "epoch": 1.8185010270545479, "grad_norm": 0.056433662657315746, "learning_rate": 0.0001457480049222385, "loss": 0.8494, "step": 103580 }, { "epoch": 1.8186765919345493, "grad_norm": 0.05667465641432079, "learning_rate": 0.00014573806431885595, "loss": 0.8475, "step": 103590 }, { "epoch": 1.8188521568145508, "grad_norm": 0.048114032338695344, "learning_rate": 0.00014572812314863351, "loss": 0.85, "step": 103600 }, { "epoch": 1.8190277216945523, "grad_norm": 0.05609438972161714, "learning_rate": 0.00014571818141169722, "loss": 0.8465, "step": 103610 }, { "epoch": 1.8192032865745538, "grad_norm": 0.05325899060752258, "learning_rate": 0.00014570823910817297, "loss": 0.8536, "step": 103620 }, { "epoch": 1.819378851454555, "grad_norm": 0.1062252917676987, "learning_rate": 0.00014569829623818677, "loss": 0.8458, "step": 103630 }, { "epoch": 1.8195544163345563, "grad_norm": 0.044830391611341364, "learning_rate": 0.00014568835280186456, "loss": 0.8446, "step": 103640 }, { "epoch": 1.8197299812145578, "grad_norm": 0.08624428908472713, "learning_rate": 0.0001456784087993324, "loss": 0.8394, "step": 103650 }, { "epoch": 1.8199055460945592, "grad_norm": 0.06985088289492071, "learning_rate": 0.00014566846423071623, "loss": 0.8506, "step": 103660 }, { "epoch": 1.8200811109745607, "grad_norm": 0.04816300984936778, "learning_rate": 0.00014565851909614203, "loss": 0.8498, "step": 103670 }, { "epoch": 1.8202566758545622, "grad_norm": 0.05384698487434245, "learning_rate": 0.00014564857339573588, "loss": 0.8449, "step": 103680 }, { "epoch": 1.8204322407345634, "grad_norm": 0.0574895200156774, "learning_rate": 0.00014563862712962372, "loss": 0.8445, "step": 103690 }, { "epoch": 1.8206078056145647, "grad_norm": 0.06803156302934985, "learning_rate": 0.0001456286802979316, "loss": 0.8436, "step": 103700 }, { "epoch": 1.8207833704945662, "grad_norm": 0.06248569976459023, "learning_rate": 0.00014561873290078555, "loss": 0.8463, "step": 103710 }, { "epoch": 1.8209589353745677, "grad_norm": 0.07293376816098275, "learning_rate": 0.0001456087849383116, "loss": 0.8508, "step": 103720 }, { "epoch": 1.8211345002545691, "grad_norm": 0.04967432585243372, "learning_rate": 0.0001455988364106358, "loss": 0.8474, "step": 103730 }, { "epoch": 1.8213100651345706, "grad_norm": 0.05308784942116989, "learning_rate": 0.00014558888731788417, "loss": 0.8403, "step": 103740 }, { "epoch": 1.8214856300145719, "grad_norm": 0.04926429358967063, "learning_rate": 0.00014557893766018284, "loss": 0.8498, "step": 103750 }, { "epoch": 1.8216611948945733, "grad_norm": 0.05706302677269536, "learning_rate": 0.00014556898743765782, "loss": 0.8514, "step": 103760 }, { "epoch": 1.8218367597745746, "grad_norm": 0.07560068378554091, "learning_rate": 0.00014555903665043515, "loss": 0.8518, "step": 103770 }, { "epoch": 1.822012324654576, "grad_norm": 0.051747842584976604, "learning_rate": 0.00014554908529864098, "loss": 0.845, "step": 103780 }, { "epoch": 1.8221878895345776, "grad_norm": 0.06610801474062032, "learning_rate": 0.00014553913338240132, "loss": 0.8482, "step": 103790 }, { "epoch": 1.822363454414579, "grad_norm": 0.1007452586187125, "learning_rate": 0.00014552918090184234, "loss": 0.8486, "step": 103800 }, { "epoch": 1.8225390192945803, "grad_norm": 0.048416038547672006, "learning_rate": 0.00014551922785709012, "loss": 0.8465, "step": 103810 }, { "epoch": 1.8227145841745818, "grad_norm": 0.060798949682430846, "learning_rate": 0.00014550927424827073, "loss": 0.8498, "step": 103820 }, { "epoch": 1.822890149054583, "grad_norm": 0.06032790417445101, "learning_rate": 0.00014549932007551028, "loss": 0.8525, "step": 103830 }, { "epoch": 1.8230657139345845, "grad_norm": 0.060918439787335535, "learning_rate": 0.00014548936533893494, "loss": 0.8521, "step": 103840 }, { "epoch": 1.823241278814586, "grad_norm": 0.07031275462114858, "learning_rate": 0.0001454794100386708, "loss": 0.8557, "step": 103850 }, { "epoch": 1.8234168436945875, "grad_norm": 0.05412675602387381, "learning_rate": 0.00014546945417484404, "loss": 0.8485, "step": 103860 }, { "epoch": 1.8235924085745887, "grad_norm": 0.0629298751025017, "learning_rate": 0.00014545949774758077, "loss": 0.8544, "step": 103870 }, { "epoch": 1.8237679734545902, "grad_norm": 0.05737261387869819, "learning_rate": 0.00014544954075700712, "loss": 0.8511, "step": 103880 }, { "epoch": 1.8239435383345914, "grad_norm": 0.08761124953938763, "learning_rate": 0.00014543958320324929, "loss": 0.842, "step": 103890 }, { "epoch": 1.824119103214593, "grad_norm": 0.05443669725867993, "learning_rate": 0.0001454296250864334, "loss": 0.8528, "step": 103900 }, { "epoch": 1.8242946680945944, "grad_norm": 0.049672424946825354, "learning_rate": 0.00014541966640668565, "loss": 0.8457, "step": 103910 }, { "epoch": 1.8244702329745959, "grad_norm": 0.06543816745771697, "learning_rate": 0.00014540970716413223, "loss": 0.8404, "step": 103920 }, { "epoch": 1.8246457978545971, "grad_norm": 0.08685339768181459, "learning_rate": 0.0001453997473588993, "loss": 0.8482, "step": 103930 }, { "epoch": 1.8248213627345986, "grad_norm": 0.07795145854049687, "learning_rate": 0.00014538978699111307, "loss": 0.8493, "step": 103940 }, { "epoch": 1.8249969276145999, "grad_norm": 0.052798915744215524, "learning_rate": 0.00014537982606089973, "loss": 0.8591, "step": 103950 }, { "epoch": 1.8251724924946013, "grad_norm": 0.06390329998294687, "learning_rate": 0.00014536986456838545, "loss": 0.8518, "step": 103960 }, { "epoch": 1.8253480573746028, "grad_norm": 0.06305254683220209, "learning_rate": 0.00014535990251369652, "loss": 0.8447, "step": 103970 }, { "epoch": 1.8255236222546043, "grad_norm": 0.07192618989329394, "learning_rate": 0.00014534993989695914, "loss": 0.8442, "step": 103980 }, { "epoch": 1.8256991871346056, "grad_norm": 0.06620298961029075, "learning_rate": 0.0001453399767182995, "loss": 0.8454, "step": 103990 }, { "epoch": 1.825874752014607, "grad_norm": 0.0677387431315837, "learning_rate": 0.00014533001297784384, "loss": 0.8506, "step": 104000 }, { "epoch": 1.8260503168946083, "grad_norm": 0.052340678303456185, "learning_rate": 0.00014532004867571846, "loss": 0.8471, "step": 104010 }, { "epoch": 1.8262258817746098, "grad_norm": 0.05311824483018857, "learning_rate": 0.00014531008381204953, "loss": 0.846, "step": 104020 }, { "epoch": 1.8264014466546112, "grad_norm": 0.07318437678902039, "learning_rate": 0.00014530011838696338, "loss": 0.861, "step": 104030 }, { "epoch": 1.8265770115346127, "grad_norm": 0.06047287150884961, "learning_rate": 0.00014529015240058622, "loss": 0.8486, "step": 104040 }, { "epoch": 1.826752576414614, "grad_norm": 0.04954628516358834, "learning_rate": 0.00014528018585304434, "loss": 0.8466, "step": 104050 }, { "epoch": 1.8269281412946154, "grad_norm": 0.053673465785127515, "learning_rate": 0.000145270218744464, "loss": 0.8495, "step": 104060 }, { "epoch": 1.8271037061746167, "grad_norm": 0.06757328523726161, "learning_rate": 0.00014526025107497155, "loss": 0.8437, "step": 104070 }, { "epoch": 1.8272792710546182, "grad_norm": 0.06034596865910602, "learning_rate": 0.0001452502828446932, "loss": 0.8464, "step": 104080 }, { "epoch": 1.8274548359346197, "grad_norm": 0.0716904074468678, "learning_rate": 0.00014524031405375532, "loss": 0.8565, "step": 104090 }, { "epoch": 1.8276304008146211, "grad_norm": 0.07205661444400599, "learning_rate": 0.00014523034470228415, "loss": 0.8476, "step": 104100 }, { "epoch": 1.8278059656946226, "grad_norm": 0.07102809453948053, "learning_rate": 0.00014522037479040604, "loss": 0.8468, "step": 104110 }, { "epoch": 1.8279815305746239, "grad_norm": 0.059026014660531645, "learning_rate": 0.00014521040431824734, "loss": 0.8538, "step": 104120 }, { "epoch": 1.8281570954546251, "grad_norm": 0.056868598603061614, "learning_rate": 0.0001452004332859343, "loss": 0.8495, "step": 104130 }, { "epoch": 1.8283326603346266, "grad_norm": 0.06350616035524086, "learning_rate": 0.0001451904616935933, "loss": 0.8401, "step": 104140 }, { "epoch": 1.828508225214628, "grad_norm": 0.050935435255591306, "learning_rate": 0.00014518048954135072, "loss": 0.8477, "step": 104150 }, { "epoch": 1.8286837900946296, "grad_norm": 0.0846137130155374, "learning_rate": 0.00014517051682933288, "loss": 0.8416, "step": 104160 }, { "epoch": 1.828859354974631, "grad_norm": 0.05492541274331719, "learning_rate": 0.00014516054355766608, "loss": 0.8485, "step": 104170 }, { "epoch": 1.8290349198546323, "grad_norm": 0.057451721713236314, "learning_rate": 0.00014515056972647675, "loss": 0.849, "step": 104180 }, { "epoch": 1.8292104847346335, "grad_norm": 0.054695019296509134, "learning_rate": 0.00014514059533589124, "loss": 0.8458, "step": 104190 }, { "epoch": 1.829386049614635, "grad_norm": 0.06226453329089998, "learning_rate": 0.00014513062038603593, "loss": 0.8388, "step": 104200 }, { "epoch": 1.8295616144946365, "grad_norm": 0.05851732027763638, "learning_rate": 0.00014512064487703715, "loss": 0.853, "step": 104210 }, { "epoch": 1.829737179374638, "grad_norm": 0.0557548377784978, "learning_rate": 0.00014511066880902143, "loss": 0.8446, "step": 104220 }, { "epoch": 1.8299127442546395, "grad_norm": 0.0625415624747608, "learning_rate": 0.00014510069218211504, "loss": 0.8486, "step": 104230 }, { "epoch": 1.8300883091346407, "grad_norm": 0.0608376327166691, "learning_rate": 0.0001450907149964444, "loss": 0.8458, "step": 104240 }, { "epoch": 1.830263874014642, "grad_norm": 0.053990028882840616, "learning_rate": 0.00014508073725213598, "loss": 0.8491, "step": 104250 }, { "epoch": 1.8304394388946434, "grad_norm": 0.07280071513517536, "learning_rate": 0.0001450707589493162, "loss": 0.8605, "step": 104260 }, { "epoch": 1.830615003774645, "grad_norm": 0.0527541627035653, "learning_rate": 0.00014506078008811143, "loss": 0.8462, "step": 104270 }, { "epoch": 1.8307905686546464, "grad_norm": 0.046081626570998485, "learning_rate": 0.0001450508006686481, "loss": 0.8481, "step": 104280 }, { "epoch": 1.8309661335346479, "grad_norm": 0.0521331325165148, "learning_rate": 0.00014504082069105275, "loss": 0.8423, "step": 104290 }, { "epoch": 1.8311416984146491, "grad_norm": 0.059447253967566956, "learning_rate": 0.00014503084015545173, "loss": 0.8521, "step": 104300 }, { "epoch": 1.8313172632946504, "grad_norm": 0.04759922706850495, "learning_rate": 0.0001450208590619715, "loss": 0.8509, "step": 104310 }, { "epoch": 1.8314928281746519, "grad_norm": 0.06496509959826567, "learning_rate": 0.0001450108774107386, "loss": 0.8393, "step": 104320 }, { "epoch": 1.8316683930546533, "grad_norm": 0.06927111942250183, "learning_rate": 0.0001450008952018794, "loss": 0.8596, "step": 104330 }, { "epoch": 1.8318439579346548, "grad_norm": 0.040314147060411615, "learning_rate": 0.00014499091243552046, "loss": 0.8522, "step": 104340 }, { "epoch": 1.8320195228146563, "grad_norm": 0.06897843693927244, "learning_rate": 0.0001449809291117882, "loss": 0.8428, "step": 104350 }, { "epoch": 1.8321950876946576, "grad_norm": 0.052562630634805416, "learning_rate": 0.00014497094523080917, "loss": 0.8488, "step": 104360 }, { "epoch": 1.8323706525746588, "grad_norm": 0.05628542132656844, "learning_rate": 0.00014496096079270982, "loss": 0.8429, "step": 104370 }, { "epoch": 1.8325462174546603, "grad_norm": 0.04074547051931621, "learning_rate": 0.00014495097579761667, "loss": 0.8454, "step": 104380 }, { "epoch": 1.8327217823346618, "grad_norm": 0.06279640709000898, "learning_rate": 0.00014494099024565623, "loss": 0.843, "step": 104390 }, { "epoch": 1.8328973472146632, "grad_norm": 0.05854862808552525, "learning_rate": 0.00014493100413695502, "loss": 0.8393, "step": 104400 }, { "epoch": 1.8330729120946647, "grad_norm": 0.06004371835786594, "learning_rate": 0.00014492101747163956, "loss": 0.8517, "step": 104410 }, { "epoch": 1.833248476974666, "grad_norm": 0.05485700263495913, "learning_rate": 0.00014491103024983644, "loss": 0.8505, "step": 104420 }, { "epoch": 1.8334240418546672, "grad_norm": 0.07989898774971158, "learning_rate": 0.00014490104247167214, "loss": 0.8422, "step": 104430 }, { "epoch": 1.8335996067346687, "grad_norm": 0.0698563321365467, "learning_rate": 0.0001448910541372732, "loss": 0.8428, "step": 104440 }, { "epoch": 1.8337751716146702, "grad_norm": 0.043560890355799166, "learning_rate": 0.00014488106524676616, "loss": 0.846, "step": 104450 }, { "epoch": 1.8339507364946717, "grad_norm": 0.06311623539340162, "learning_rate": 0.00014487107580027765, "loss": 0.8441, "step": 104460 }, { "epoch": 1.8341263013746731, "grad_norm": 0.08966435225073092, "learning_rate": 0.0001448610857979342, "loss": 0.8419, "step": 104470 }, { "epoch": 1.8343018662546744, "grad_norm": 0.05564065564271927, "learning_rate": 0.00014485109523986237, "loss": 0.8449, "step": 104480 }, { "epoch": 1.8344774311346759, "grad_norm": 0.04541515744824186, "learning_rate": 0.00014484110412618878, "loss": 0.8398, "step": 104490 }, { "epoch": 1.8346529960146771, "grad_norm": 0.08655378863361059, "learning_rate": 0.00014483111245704, "loss": 0.8408, "step": 104500 }, { "epoch": 1.8348285608946786, "grad_norm": 0.06549743382682997, "learning_rate": 0.00014482112023254262, "loss": 0.8453, "step": 104510 }, { "epoch": 1.83500412577468, "grad_norm": 0.0455265368866188, "learning_rate": 0.00014481112745282323, "loss": 0.8534, "step": 104520 }, { "epoch": 1.8351796906546816, "grad_norm": 0.06164994946019855, "learning_rate": 0.00014480113411800849, "loss": 0.8476, "step": 104530 }, { "epoch": 1.8353552555346828, "grad_norm": 0.05145123138243053, "learning_rate": 0.00014479114022822496, "loss": 0.8447, "step": 104540 }, { "epoch": 1.8355308204146843, "grad_norm": 0.07988407001286241, "learning_rate": 0.0001447811457835993, "loss": 0.8497, "step": 104550 }, { "epoch": 1.8357063852946855, "grad_norm": 0.050550514712438946, "learning_rate": 0.00014477115078425813, "loss": 0.8466, "step": 104560 }, { "epoch": 1.835881950174687, "grad_norm": 0.04905350095380034, "learning_rate": 0.00014476115523032808, "loss": 0.8464, "step": 104570 }, { "epoch": 1.8360575150546885, "grad_norm": 0.06477172367640323, "learning_rate": 0.00014475115912193583, "loss": 0.8448, "step": 104580 }, { "epoch": 1.83623307993469, "grad_norm": 0.05561516936538364, "learning_rate": 0.00014474116245920797, "loss": 0.8548, "step": 104590 }, { "epoch": 1.8364086448146912, "grad_norm": 0.05257069576767229, "learning_rate": 0.00014473116524227125, "loss": 0.8488, "step": 104600 }, { "epoch": 1.8365842096946927, "grad_norm": 0.0743718011767674, "learning_rate": 0.00014472116747125228, "loss": 0.8477, "step": 104610 }, { "epoch": 1.836759774574694, "grad_norm": 0.06879615834479477, "learning_rate": 0.0001447111691462777, "loss": 0.848, "step": 104620 }, { "epoch": 1.8369353394546954, "grad_norm": 0.08145576560373398, "learning_rate": 0.00014470117026747426, "loss": 0.8393, "step": 104630 }, { "epoch": 1.837110904334697, "grad_norm": 0.06896568366437907, "learning_rate": 0.0001446911708349686, "loss": 0.8502, "step": 104640 }, { "epoch": 1.8372864692146984, "grad_norm": 0.06373185742560591, "learning_rate": 0.00014468117084888747, "loss": 0.8415, "step": 104650 }, { "epoch": 1.8374620340946997, "grad_norm": 0.04782265542222571, "learning_rate": 0.0001446711703093575, "loss": 0.8463, "step": 104660 }, { "epoch": 1.8376375989747011, "grad_norm": 0.04434110808190677, "learning_rate": 0.00014466116921650542, "loss": 0.8529, "step": 104670 }, { "epoch": 1.8378131638547024, "grad_norm": 0.06840731925541023, "learning_rate": 0.00014465116757045798, "loss": 0.8424, "step": 104680 }, { "epoch": 1.8379887287347039, "grad_norm": 0.05480411771743443, "learning_rate": 0.00014464116537134188, "loss": 0.8513, "step": 104690 }, { "epoch": 1.8381642936147053, "grad_norm": 0.05461782728309248, "learning_rate": 0.00014463116261928383, "loss": 0.8448, "step": 104700 }, { "epoch": 1.8383398584947068, "grad_norm": 0.0472545170022371, "learning_rate": 0.00014462115931441062, "loss": 0.8484, "step": 104710 }, { "epoch": 1.838515423374708, "grad_norm": 0.07138042002633492, "learning_rate": 0.00014461115545684895, "loss": 0.8467, "step": 104720 }, { "epoch": 1.8386909882547096, "grad_norm": 0.05438005598449055, "learning_rate": 0.0001446011510467256, "loss": 0.8453, "step": 104730 }, { "epoch": 1.8388665531347108, "grad_norm": 0.05781638094644989, "learning_rate": 0.00014459114608416724, "loss": 0.8418, "step": 104740 }, { "epoch": 1.8390421180147123, "grad_norm": 0.0630991080054205, "learning_rate": 0.0001445811405693008, "loss": 0.853, "step": 104750 }, { "epoch": 1.8392176828947138, "grad_norm": 0.06936775904450057, "learning_rate": 0.0001445711345022529, "loss": 0.8471, "step": 104760 }, { "epoch": 1.8393932477747152, "grad_norm": 0.05002906764594328, "learning_rate": 0.00014456112788315039, "loss": 0.8478, "step": 104770 }, { "epoch": 1.8395688126547165, "grad_norm": 0.060422605626508044, "learning_rate": 0.00014455112071212002, "loss": 0.8519, "step": 104780 }, { "epoch": 1.839744377534718, "grad_norm": 0.0544593676342436, "learning_rate": 0.00014454111298928862, "loss": 0.8442, "step": 104790 }, { "epoch": 1.8399199424147192, "grad_norm": 0.06879640382305899, "learning_rate": 0.000144531104714783, "loss": 0.849, "step": 104800 }, { "epoch": 1.8400955072947207, "grad_norm": 0.05698487199157572, "learning_rate": 0.0001445210958887299, "loss": 0.8478, "step": 104810 }, { "epoch": 1.8402710721747222, "grad_norm": 0.04301142574596063, "learning_rate": 0.00014451108651125617, "loss": 0.851, "step": 104820 }, { "epoch": 1.8404466370547237, "grad_norm": 0.06264058971448595, "learning_rate": 0.00014450107658248867, "loss": 0.8462, "step": 104830 }, { "epoch": 1.8406222019347251, "grad_norm": 0.059121319490239424, "learning_rate": 0.00014449106610255419, "loss": 0.8417, "step": 104840 }, { "epoch": 1.8407977668147264, "grad_norm": 0.05161203675654514, "learning_rate": 0.00014448105507157955, "loss": 0.8437, "step": 104850 }, { "epoch": 1.8409733316947277, "grad_norm": 0.07734212081986895, "learning_rate": 0.0001444710434896916, "loss": 0.8473, "step": 104860 }, { "epoch": 1.8411488965747291, "grad_norm": 0.052306153187998874, "learning_rate": 0.00014446103135701724, "loss": 0.8511, "step": 104870 }, { "epoch": 1.8413244614547306, "grad_norm": 0.05556103771278905, "learning_rate": 0.00014445101867368324, "loss": 0.8452, "step": 104880 }, { "epoch": 1.841500026334732, "grad_norm": 0.06644153510186514, "learning_rate": 0.00014444100543981652, "loss": 0.8486, "step": 104890 }, { "epoch": 1.8416755912147336, "grad_norm": 0.06229308302956273, "learning_rate": 0.00014443099165554395, "loss": 0.8441, "step": 104900 }, { "epoch": 1.8418511560947348, "grad_norm": 0.0538453567973435, "learning_rate": 0.00014442097732099237, "loss": 0.8469, "step": 104910 }, { "epoch": 1.842026720974736, "grad_norm": 0.04979485007305208, "learning_rate": 0.0001444109624362887, "loss": 0.8483, "step": 104920 }, { "epoch": 1.8422022858547376, "grad_norm": 0.05609681774493647, "learning_rate": 0.0001444009470015598, "loss": 0.8416, "step": 104930 }, { "epoch": 1.842377850734739, "grad_norm": 0.0677096049023443, "learning_rate": 0.0001443909310169326, "loss": 0.8473, "step": 104940 }, { "epoch": 1.8425534156147405, "grad_norm": 0.050771945494740324, "learning_rate": 0.00014438091448253402, "loss": 0.8487, "step": 104950 }, { "epoch": 1.842728980494742, "grad_norm": 0.08077315440810526, "learning_rate": 0.0001443708973984909, "loss": 0.8449, "step": 104960 }, { "epoch": 1.8429045453747432, "grad_norm": 0.04788276840885666, "learning_rate": 0.00014436087976493018, "loss": 0.8551, "step": 104970 }, { "epoch": 1.8430801102547445, "grad_norm": 0.04611985249668424, "learning_rate": 0.00014435086158197882, "loss": 0.8478, "step": 104980 }, { "epoch": 1.843255675134746, "grad_norm": 0.05695919540248361, "learning_rate": 0.00014434084284976374, "loss": 0.8487, "step": 104990 }, { "epoch": 1.8434312400147475, "grad_norm": 0.06440783717999957, "learning_rate": 0.00014433082356841187, "loss": 0.8429, "step": 105000 }, { "epoch": 1.843606804894749, "grad_norm": 0.05995257118819511, "learning_rate": 0.00014432080373805018, "loss": 0.8522, "step": 105010 }, { "epoch": 1.8437823697747504, "grad_norm": 0.052830448904488826, "learning_rate": 0.0001443107833588056, "loss": 0.8489, "step": 105020 }, { "epoch": 1.8439579346547517, "grad_norm": 0.05382948905359375, "learning_rate": 0.00014430076243080505, "loss": 0.8446, "step": 105030 }, { "epoch": 1.844133499534753, "grad_norm": 0.06459877621160837, "learning_rate": 0.00014429074095417558, "loss": 0.8498, "step": 105040 }, { "epoch": 1.8443090644147544, "grad_norm": 0.06390736210079573, "learning_rate": 0.00014428071892904414, "loss": 0.8488, "step": 105050 }, { "epoch": 1.8444846292947559, "grad_norm": 0.050275395075164236, "learning_rate": 0.00014427069635553769, "loss": 0.8523, "step": 105060 }, { "epoch": 1.8446601941747574, "grad_norm": 0.06282258932396985, "learning_rate": 0.0001442606732337832, "loss": 0.8477, "step": 105070 }, { "epoch": 1.8448357590547588, "grad_norm": 0.06978338871855795, "learning_rate": 0.0001442506495639077, "loss": 0.8521, "step": 105080 }, { "epoch": 1.84501132393476, "grad_norm": 0.06574508133677207, "learning_rate": 0.0001442406253460382, "loss": 0.8409, "step": 105090 }, { "epoch": 1.8451868888147613, "grad_norm": 0.042234097029942186, "learning_rate": 0.00014423060058030166, "loss": 0.846, "step": 105100 }, { "epoch": 1.8453624536947628, "grad_norm": 0.05805944144678811, "learning_rate": 0.00014422057526682514, "loss": 0.8523, "step": 105110 }, { "epoch": 1.8455380185747643, "grad_norm": 0.09689149220574289, "learning_rate": 0.00014421054940573567, "loss": 0.8479, "step": 105120 }, { "epoch": 1.8457135834547658, "grad_norm": 0.04878881701861149, "learning_rate": 0.0001442005229971602, "loss": 0.8481, "step": 105130 }, { "epoch": 1.8458891483347672, "grad_norm": 0.050600700989645045, "learning_rate": 0.00014419049604122588, "loss": 0.8496, "step": 105140 }, { "epoch": 1.8460647132147685, "grad_norm": 0.05528096176878486, "learning_rate": 0.00014418046853805966, "loss": 0.8453, "step": 105150 }, { "epoch": 1.8462402780947698, "grad_norm": 0.05634514658153842, "learning_rate": 0.00014417044048778866, "loss": 0.8452, "step": 105160 }, { "epoch": 1.8464158429747712, "grad_norm": 0.05574479742509345, "learning_rate": 0.00014416041189053988, "loss": 0.8454, "step": 105170 }, { "epoch": 1.8465914078547727, "grad_norm": 0.06019476224043916, "learning_rate": 0.00014415038274644042, "loss": 0.8436, "step": 105180 }, { "epoch": 1.8467669727347742, "grad_norm": 0.048108837661600144, "learning_rate": 0.00014414035305561733, "loss": 0.8505, "step": 105190 }, { "epoch": 1.8469425376147757, "grad_norm": 0.06348523513364851, "learning_rate": 0.00014413032281819773, "loss": 0.8435, "step": 105200 }, { "epoch": 1.847118102494777, "grad_norm": 0.07684988397168108, "learning_rate": 0.00014412029203430865, "loss": 0.8505, "step": 105210 }, { "epoch": 1.8472936673747784, "grad_norm": 0.06609883496574573, "learning_rate": 0.0001441102607040772, "loss": 0.8499, "step": 105220 }, { "epoch": 1.8474692322547797, "grad_norm": 0.06879414803741012, "learning_rate": 0.0001441002288276305, "loss": 0.8529, "step": 105230 }, { "epoch": 1.8476447971347811, "grad_norm": 0.04570523598852523, "learning_rate": 0.00014409019640509562, "loss": 0.8521, "step": 105240 }, { "epoch": 1.8478203620147826, "grad_norm": 0.04965360089226083, "learning_rate": 0.00014408016343659972, "loss": 0.8494, "step": 105250 }, { "epoch": 1.847995926894784, "grad_norm": 0.06003013477885608, "learning_rate": 0.0001440701299222699, "loss": 0.8414, "step": 105260 }, { "epoch": 1.8481714917747853, "grad_norm": 0.056641403449427856, "learning_rate": 0.00014406009586223324, "loss": 0.8439, "step": 105270 }, { "epoch": 1.8483470566547868, "grad_norm": 0.07828698446618373, "learning_rate": 0.00014405006125661692, "loss": 0.8372, "step": 105280 }, { "epoch": 1.848522621534788, "grad_norm": 0.05972810326846312, "learning_rate": 0.00014404002610554813, "loss": 0.8508, "step": 105290 }, { "epoch": 1.8486981864147896, "grad_norm": 0.069053087963169, "learning_rate": 0.00014402999040915389, "loss": 0.8461, "step": 105300 }, { "epoch": 1.848873751294791, "grad_norm": 0.07462298935890625, "learning_rate": 0.00014401995416756145, "loss": 0.8558, "step": 105310 }, { "epoch": 1.8490493161747925, "grad_norm": 0.056263524130788445, "learning_rate": 0.00014400991738089795, "loss": 0.8407, "step": 105320 }, { "epoch": 1.8492248810547938, "grad_norm": 0.05431571271889149, "learning_rate": 0.00014399988004929056, "loss": 0.8395, "step": 105330 }, { "epoch": 1.8494004459347952, "grad_norm": 0.051791898240005387, "learning_rate": 0.00014398984217286643, "loss": 0.8431, "step": 105340 }, { "epoch": 1.8495760108147965, "grad_norm": 0.05308405441913459, "learning_rate": 0.0001439798037517528, "loss": 0.8467, "step": 105350 }, { "epoch": 1.849751575694798, "grad_norm": 0.07238559933530972, "learning_rate": 0.00014396976478607678, "loss": 0.8503, "step": 105360 }, { "epoch": 1.8499271405747995, "grad_norm": 0.08060978660065447, "learning_rate": 0.00014395972527596562, "loss": 0.8444, "step": 105370 }, { "epoch": 1.850102705454801, "grad_norm": 0.06380114733084927, "learning_rate": 0.0001439496852215465, "loss": 0.846, "step": 105380 }, { "epoch": 1.8502782703348022, "grad_norm": 0.05562760747515547, "learning_rate": 0.00014393964462294665, "loss": 0.8534, "step": 105390 }, { "epoch": 1.8504538352148037, "grad_norm": 0.0485590948949104, "learning_rate": 0.00014392960348029324, "loss": 0.8454, "step": 105400 }, { "epoch": 1.850629400094805, "grad_norm": 0.04578927672877621, "learning_rate": 0.00014391956179371355, "loss": 0.8509, "step": 105410 }, { "epoch": 1.8508049649748064, "grad_norm": 0.047992567544259276, "learning_rate": 0.0001439095195633348, "loss": 0.8512, "step": 105420 }, { "epoch": 1.8509805298548079, "grad_norm": 0.0612133038027644, "learning_rate": 0.0001438994767892842, "loss": 0.8421, "step": 105430 }, { "epoch": 1.8511560947348094, "grad_norm": 0.04918445908685571, "learning_rate": 0.000143889433471689, "loss": 0.8475, "step": 105440 }, { "epoch": 1.8513316596148106, "grad_norm": 0.08897320873537727, "learning_rate": 0.00014387938961067648, "loss": 0.8421, "step": 105450 }, { "epoch": 1.851507224494812, "grad_norm": 0.057743006353822394, "learning_rate": 0.00014386934520637385, "loss": 0.8511, "step": 105460 }, { "epoch": 1.8516827893748133, "grad_norm": 0.059395532796334666, "learning_rate": 0.0001438593002589084, "loss": 0.8515, "step": 105470 }, { "epoch": 1.8518583542548148, "grad_norm": 0.06841509792693741, "learning_rate": 0.00014384925476840745, "loss": 0.8475, "step": 105480 }, { "epoch": 1.8520339191348163, "grad_norm": 0.05009044907997425, "learning_rate": 0.0001438392087349982, "loss": 0.8406, "step": 105490 }, { "epoch": 1.8522094840148178, "grad_norm": 0.049482034222281625, "learning_rate": 0.00014382916215880794, "loss": 0.844, "step": 105500 }, { "epoch": 1.852385048894819, "grad_norm": 0.06792225582727836, "learning_rate": 0.00014381911503996405, "loss": 0.8451, "step": 105510 }, { "epoch": 1.8525606137748205, "grad_norm": 0.06994152798722306, "learning_rate": 0.00014380906737859368, "loss": 0.8463, "step": 105520 }, { "epoch": 1.8527361786548218, "grad_norm": 0.06055368410006964, "learning_rate": 0.00014379901917482428, "loss": 0.8419, "step": 105530 }, { "epoch": 1.8529117435348232, "grad_norm": 0.05733804173170315, "learning_rate": 0.00014378897042878307, "loss": 0.8458, "step": 105540 }, { "epoch": 1.8530873084148247, "grad_norm": 0.058000389828197545, "learning_rate": 0.00014377892114059742, "loss": 0.8478, "step": 105550 }, { "epoch": 1.8532628732948262, "grad_norm": 0.05025373137179316, "learning_rate": 0.00014376887131039467, "loss": 0.8509, "step": 105560 }, { "epoch": 1.8534384381748277, "grad_norm": 0.06326491809135787, "learning_rate": 0.00014375882093830208, "loss": 0.8455, "step": 105570 }, { "epoch": 1.853614003054829, "grad_norm": 0.04326927491664561, "learning_rate": 0.00014374877002444708, "loss": 0.8499, "step": 105580 }, { "epoch": 1.8537895679348302, "grad_norm": 0.06149130752774975, "learning_rate": 0.00014373871856895694, "loss": 0.8561, "step": 105590 }, { "epoch": 1.8539651328148317, "grad_norm": 0.04931187010374151, "learning_rate": 0.00014372866657195904, "loss": 0.842, "step": 105600 }, { "epoch": 1.8541406976948331, "grad_norm": 0.0529182904041342, "learning_rate": 0.00014371861403358074, "loss": 0.853, "step": 105610 }, { "epoch": 1.8543162625748346, "grad_norm": 0.06553724259871956, "learning_rate": 0.00014370856095394946, "loss": 0.8408, "step": 105620 }, { "epoch": 1.854491827454836, "grad_norm": 0.07176489522510181, "learning_rate": 0.00014369850733319248, "loss": 0.8476, "step": 105630 }, { "epoch": 1.8546673923348374, "grad_norm": 0.06385213511496166, "learning_rate": 0.00014368845317143725, "loss": 0.8442, "step": 105640 }, { "epoch": 1.8548429572148386, "grad_norm": 0.07032120088265846, "learning_rate": 0.00014367839846881112, "loss": 0.8544, "step": 105650 }, { "epoch": 1.85501852209484, "grad_norm": 0.06458903577006424, "learning_rate": 0.0001436683432254415, "loss": 0.8514, "step": 105660 }, { "epoch": 1.8551940869748416, "grad_norm": 0.047885956073720044, "learning_rate": 0.00014365828744145582, "loss": 0.8364, "step": 105670 }, { "epoch": 1.855369651854843, "grad_norm": 0.056563015813032226, "learning_rate": 0.00014364823111698147, "loss": 0.8459, "step": 105680 }, { "epoch": 1.8555452167348445, "grad_norm": 0.07881521268803729, "learning_rate": 0.00014363817425214583, "loss": 0.8472, "step": 105690 }, { "epoch": 1.8557207816148458, "grad_norm": 0.0504751637851391, "learning_rate": 0.00014362811684707637, "loss": 0.8493, "step": 105700 }, { "epoch": 1.855896346494847, "grad_norm": 0.0628142103849103, "learning_rate": 0.00014361805890190045, "loss": 0.8416, "step": 105710 }, { "epoch": 1.8560719113748485, "grad_norm": 0.04520123292801607, "learning_rate": 0.00014360800041674562, "loss": 0.8418, "step": 105720 }, { "epoch": 1.85624747625485, "grad_norm": 0.06115456241514542, "learning_rate": 0.00014359794139173922, "loss": 0.8436, "step": 105730 }, { "epoch": 1.8564230411348515, "grad_norm": 0.05502703561893415, "learning_rate": 0.00014358788182700877, "loss": 0.8478, "step": 105740 }, { "epoch": 1.856598606014853, "grad_norm": 0.0668340131696173, "learning_rate": 0.0001435778217226817, "loss": 0.8493, "step": 105750 }, { "epoch": 1.8567741708948542, "grad_norm": 0.05952188609868521, "learning_rate": 0.00014356776107888542, "loss": 0.8508, "step": 105760 }, { "epoch": 1.8569497357748554, "grad_norm": 0.07717501901512717, "learning_rate": 0.00014355769989574747, "loss": 0.8404, "step": 105770 }, { "epoch": 1.857125300654857, "grad_norm": 0.08427664902579136, "learning_rate": 0.00014354763817339529, "loss": 0.8469, "step": 105780 }, { "epoch": 1.8573008655348584, "grad_norm": 0.05908911822536438, "learning_rate": 0.00014353757591195642, "loss": 0.8503, "step": 105790 }, { "epoch": 1.8574764304148599, "grad_norm": 0.06498241932218926, "learning_rate": 0.0001435275131115583, "loss": 0.8498, "step": 105800 }, { "epoch": 1.8576519952948614, "grad_norm": 0.05198884428792769, "learning_rate": 0.00014351744977232842, "loss": 0.8418, "step": 105810 }, { "epoch": 1.8578275601748626, "grad_norm": 0.05188555309641177, "learning_rate": 0.0001435073858943943, "loss": 0.8492, "step": 105820 }, { "epoch": 1.8580031250548639, "grad_norm": 0.0709295247386001, "learning_rate": 0.00014349732147788348, "loss": 0.8488, "step": 105830 }, { "epoch": 1.8581786899348653, "grad_norm": 0.06260994986469656, "learning_rate": 0.00014348725652292346, "loss": 0.8428, "step": 105840 }, { "epoch": 1.8583542548148668, "grad_norm": 0.05559356657050467, "learning_rate": 0.00014347719102964174, "loss": 0.8482, "step": 105850 }, { "epoch": 1.8585298196948683, "grad_norm": 0.05518184823135107, "learning_rate": 0.00014346712499816586, "loss": 0.8489, "step": 105860 }, { "epoch": 1.8587053845748698, "grad_norm": 0.05041999233228258, "learning_rate": 0.0001434570584286234, "loss": 0.8461, "step": 105870 }, { "epoch": 1.858880949454871, "grad_norm": 0.04860185562823752, "learning_rate": 0.0001434469913211418, "loss": 0.8573, "step": 105880 }, { "epoch": 1.8590565143348723, "grad_norm": 0.045906976520861995, "learning_rate": 0.00014343692367584877, "loss": 0.838, "step": 105890 }, { "epoch": 1.8592320792148738, "grad_norm": 0.04867132825814745, "learning_rate": 0.00014342685549287174, "loss": 0.8462, "step": 105900 }, { "epoch": 1.8594076440948752, "grad_norm": 0.05346777791073834, "learning_rate": 0.00014341678677233833, "loss": 0.845, "step": 105910 }, { "epoch": 1.8595832089748767, "grad_norm": 0.10865122499614244, "learning_rate": 0.00014340671751437612, "loss": 0.8534, "step": 105920 }, { "epoch": 1.8597587738548782, "grad_norm": 0.04812895058936261, "learning_rate": 0.00014339664771911262, "loss": 0.8435, "step": 105930 }, { "epoch": 1.8599343387348795, "grad_norm": 0.06073722792840988, "learning_rate": 0.0001433865773866755, "loss": 0.8442, "step": 105940 }, { "epoch": 1.860109903614881, "grad_norm": 0.048152760263412656, "learning_rate": 0.00014337650651719233, "loss": 0.8455, "step": 105950 }, { "epoch": 1.8602854684948822, "grad_norm": 0.08461173132057581, "learning_rate": 0.00014336643511079068, "loss": 0.8483, "step": 105960 }, { "epoch": 1.8604610333748837, "grad_norm": 0.055151699199706194, "learning_rate": 0.0001433563631675982, "loss": 0.8494, "step": 105970 }, { "epoch": 1.8606365982548851, "grad_norm": 0.06852641796368732, "learning_rate": 0.00014334629068774246, "loss": 0.8464, "step": 105980 }, { "epoch": 1.8608121631348866, "grad_norm": 0.05039410347012162, "learning_rate": 0.00014333621767135112, "loss": 0.8429, "step": 105990 }, { "epoch": 1.8609877280148879, "grad_norm": 0.05270753421936715, "learning_rate": 0.00014332614411855177, "loss": 0.8505, "step": 106000 }, { "epoch": 1.8611632928948894, "grad_norm": 0.05007712127983891, "learning_rate": 0.0001433160700294721, "loss": 0.8451, "step": 106010 }, { "epoch": 1.8613388577748906, "grad_norm": 0.05474355403358141, "learning_rate": 0.00014330599540423965, "loss": 0.8432, "step": 106020 }, { "epoch": 1.861514422654892, "grad_norm": 0.06424179647035486, "learning_rate": 0.00014329592024298215, "loss": 0.841, "step": 106030 }, { "epoch": 1.8616899875348936, "grad_norm": 0.054897714499368845, "learning_rate": 0.00014328584454582728, "loss": 0.8443, "step": 106040 }, { "epoch": 1.861865552414895, "grad_norm": 0.08094435461837184, "learning_rate": 0.0001432757683129026, "loss": 0.85, "step": 106050 }, { "epoch": 1.8620411172948963, "grad_norm": 0.0489076394863472, "learning_rate": 0.00014326569154433588, "loss": 0.852, "step": 106060 }, { "epoch": 1.8622166821748978, "grad_norm": 0.055851275170473204, "learning_rate": 0.00014325561424025473, "loss": 0.8461, "step": 106070 }, { "epoch": 1.862392247054899, "grad_norm": 0.055846653203605476, "learning_rate": 0.00014324553640078684, "loss": 0.8413, "step": 106080 }, { "epoch": 1.8625678119349005, "grad_norm": 0.05795555425517053, "learning_rate": 0.00014323545802605994, "loss": 0.8422, "step": 106090 }, { "epoch": 1.862743376814902, "grad_norm": 0.053848750324656774, "learning_rate": 0.00014322537911620165, "loss": 0.8419, "step": 106100 }, { "epoch": 1.8629189416949035, "grad_norm": 0.06082115786942185, "learning_rate": 0.00014321529967133972, "loss": 0.8414, "step": 106110 }, { "epoch": 1.8630945065749047, "grad_norm": 0.04536819587977698, "learning_rate": 0.00014320521969160186, "loss": 0.8511, "step": 106120 }, { "epoch": 1.8632700714549062, "grad_norm": 0.05804081068897011, "learning_rate": 0.00014319513917711578, "loss": 0.8475, "step": 106130 }, { "epoch": 1.8634456363349075, "grad_norm": 0.059863783339413786, "learning_rate": 0.0001431850581280092, "loss": 0.8438, "step": 106140 }, { "epoch": 1.863621201214909, "grad_norm": 0.05850482433688861, "learning_rate": 0.00014317497654440985, "loss": 0.8435, "step": 106150 }, { "epoch": 1.8637967660949104, "grad_norm": 0.08743121459884803, "learning_rate": 0.00014316489442644547, "loss": 0.8487, "step": 106160 }, { "epoch": 1.8639723309749119, "grad_norm": 0.05456994854308207, "learning_rate": 0.00014315481177424376, "loss": 0.8521, "step": 106170 }, { "epoch": 1.8641478958549131, "grad_norm": 0.0662911114432518, "learning_rate": 0.00014314472858793255, "loss": 0.8467, "step": 106180 }, { "epoch": 1.8643234607349146, "grad_norm": 0.06001968944784056, "learning_rate": 0.00014313464486763957, "loss": 0.8432, "step": 106190 }, { "epoch": 1.8644990256149159, "grad_norm": 0.05792799944250671, "learning_rate": 0.0001431245606134925, "loss": 0.8509, "step": 106200 }, { "epoch": 1.8646745904949174, "grad_norm": 0.05158513975245155, "learning_rate": 0.00014311447582561923, "loss": 0.8398, "step": 106210 }, { "epoch": 1.8648501553749188, "grad_norm": 0.04893682102015972, "learning_rate": 0.00014310439050414746, "loss": 0.8494, "step": 106220 }, { "epoch": 1.8650257202549203, "grad_norm": 0.052089546400028115, "learning_rate": 0.00014309430464920497, "loss": 0.8502, "step": 106230 }, { "epoch": 1.8652012851349216, "grad_norm": 0.06148195231914974, "learning_rate": 0.00014308421826091962, "loss": 0.8563, "step": 106240 }, { "epoch": 1.865376850014923, "grad_norm": 0.07035013334097538, "learning_rate": 0.0001430741313394191, "loss": 0.8446, "step": 106250 }, { "epoch": 1.8655524148949243, "grad_norm": 0.06553126762720474, "learning_rate": 0.0001430640438848313, "loss": 0.8455, "step": 106260 }, { "epoch": 1.8657279797749258, "grad_norm": 0.056809769280939004, "learning_rate": 0.00014305395589728402, "loss": 0.8485, "step": 106270 }, { "epoch": 1.8659035446549272, "grad_norm": 0.08196862181374277, "learning_rate": 0.00014304386737690506, "loss": 0.8507, "step": 106280 }, { "epoch": 1.8660791095349287, "grad_norm": 0.05724522921159814, "learning_rate": 0.0001430337783238222, "loss": 0.8445, "step": 106290 }, { "epoch": 1.8662546744149302, "grad_norm": 0.0639744094990267, "learning_rate": 0.00014302368873816336, "loss": 0.8386, "step": 106300 }, { "epoch": 1.8664302392949315, "grad_norm": 0.05048336654108011, "learning_rate": 0.0001430135986200563, "loss": 0.8478, "step": 106310 }, { "epoch": 1.8666058041749327, "grad_norm": 0.07035333767761513, "learning_rate": 0.00014300350796962892, "loss": 0.8491, "step": 106320 }, { "epoch": 1.8667813690549342, "grad_norm": 0.06906543606468618, "learning_rate": 0.00014299341678700904, "loss": 0.8493, "step": 106330 }, { "epoch": 1.8669569339349357, "grad_norm": 0.07519086704381173, "learning_rate": 0.00014298332507232448, "loss": 0.8485, "step": 106340 }, { "epoch": 1.8671324988149371, "grad_norm": 0.05427978819979762, "learning_rate": 0.00014297323282570318, "loss": 0.8526, "step": 106350 }, { "epoch": 1.8673080636949386, "grad_norm": 0.05731549885886311, "learning_rate": 0.00014296314004727298, "loss": 0.8396, "step": 106360 }, { "epoch": 1.8674836285749399, "grad_norm": 0.06107116922718381, "learning_rate": 0.00014295304673716172, "loss": 0.8412, "step": 106370 }, { "epoch": 1.8676591934549411, "grad_norm": 0.04861769694047454, "learning_rate": 0.00014294295289549739, "loss": 0.8459, "step": 106380 }, { "epoch": 1.8678347583349426, "grad_norm": 0.06375853880432544, "learning_rate": 0.00014293285852240773, "loss": 0.8483, "step": 106390 }, { "epoch": 1.868010323214944, "grad_norm": 0.05217875770225602, "learning_rate": 0.00014292276361802076, "loss": 0.841, "step": 106400 }, { "epoch": 1.8681858880949456, "grad_norm": 0.05078627976329698, "learning_rate": 0.0001429126681824643, "loss": 0.8451, "step": 106410 }, { "epoch": 1.868361452974947, "grad_norm": 0.06910009442831427, "learning_rate": 0.00014290257221586633, "loss": 0.844, "step": 106420 }, { "epoch": 1.8685370178549483, "grad_norm": 0.06905871657021134, "learning_rate": 0.00014289247571835475, "loss": 0.8464, "step": 106430 }, { "epoch": 1.8687125827349496, "grad_norm": 0.07317570828928004, "learning_rate": 0.00014288237869005744, "loss": 0.8488, "step": 106440 }, { "epoch": 1.868888147614951, "grad_norm": 0.11072606347813466, "learning_rate": 0.00014287228113110242, "loss": 0.8491, "step": 106450 }, { "epoch": 1.8690637124949525, "grad_norm": 0.06054434908992929, "learning_rate": 0.0001428621830416175, "loss": 0.8421, "step": 106460 }, { "epoch": 1.869239277374954, "grad_norm": 0.06629311997281169, "learning_rate": 0.00014285208442173076, "loss": 0.8422, "step": 106470 }, { "epoch": 1.8694148422549555, "grad_norm": 0.058903305878965063, "learning_rate": 0.00014284198527157008, "loss": 0.8549, "step": 106480 }, { "epoch": 1.8695904071349567, "grad_norm": 0.04849563544425149, "learning_rate": 0.0001428318855912634, "loss": 0.8465, "step": 106490 }, { "epoch": 1.869765972014958, "grad_norm": 0.057268247192382646, "learning_rate": 0.00014282178538093876, "loss": 0.8444, "step": 106500 }, { "epoch": 1.8699415368949595, "grad_norm": 0.05744425080670455, "learning_rate": 0.00014281168464072404, "loss": 0.8531, "step": 106510 }, { "epoch": 1.870117101774961, "grad_norm": 0.059870508191846604, "learning_rate": 0.00014280158337074726, "loss": 0.8362, "step": 106520 }, { "epoch": 1.8702926666549624, "grad_norm": 0.05860086733234563, "learning_rate": 0.00014279148157113645, "loss": 0.8469, "step": 106530 }, { "epoch": 1.8704682315349639, "grad_norm": 0.05742856494832758, "learning_rate": 0.00014278137924201954, "loss": 0.8434, "step": 106540 }, { "epoch": 1.8706437964149651, "grad_norm": 0.08241862264768168, "learning_rate": 0.00014277127638352453, "loss": 0.858, "step": 106550 }, { "epoch": 1.8708193612949664, "grad_norm": 0.05187892899933325, "learning_rate": 0.00014276117299577942, "loss": 0.8488, "step": 106560 }, { "epoch": 1.8709949261749679, "grad_norm": 0.05221914509180794, "learning_rate": 0.00014275106907891228, "loss": 0.8551, "step": 106570 }, { "epoch": 1.8711704910549694, "grad_norm": 0.06941507300408725, "learning_rate": 0.00014274096463305112, "loss": 0.8482, "step": 106580 }, { "epoch": 1.8713460559349708, "grad_norm": 0.06786575612933661, "learning_rate": 0.0001427308596583239, "loss": 0.8448, "step": 106590 }, { "epoch": 1.8715216208149723, "grad_norm": 0.03994729824373997, "learning_rate": 0.0001427207541548587, "loss": 0.852, "step": 106600 }, { "epoch": 1.8716971856949736, "grad_norm": 0.04275986311730804, "learning_rate": 0.00014271064812278354, "loss": 0.8437, "step": 106610 }, { "epoch": 1.8718727505749748, "grad_norm": 0.06467919332615514, "learning_rate": 0.0001427005415622265, "loss": 0.8492, "step": 106620 }, { "epoch": 1.8720483154549763, "grad_norm": 0.08777611258016828, "learning_rate": 0.0001426904344733156, "loss": 0.8414, "step": 106630 }, { "epoch": 1.8722238803349778, "grad_norm": 0.06770187452825405, "learning_rate": 0.00014268032685617888, "loss": 0.842, "step": 106640 }, { "epoch": 1.8723994452149793, "grad_norm": 0.12117309276788532, "learning_rate": 0.00014267021871094444, "loss": 0.8438, "step": 106650 }, { "epoch": 1.8725750100949807, "grad_norm": 0.05409034649371126, "learning_rate": 0.00014266011003774033, "loss": 0.8386, "step": 106660 }, { "epoch": 1.872750574974982, "grad_norm": 0.06100023461840248, "learning_rate": 0.00014265000083669467, "loss": 0.8481, "step": 106670 }, { "epoch": 1.8729261398549835, "grad_norm": 0.07236629086388514, "learning_rate": 0.00014263989110793553, "loss": 0.8512, "step": 106680 }, { "epoch": 1.8731017047349847, "grad_norm": 0.05009996645943287, "learning_rate": 0.00014262978085159095, "loss": 0.8489, "step": 106690 }, { "epoch": 1.8732772696149862, "grad_norm": 0.052623287048906474, "learning_rate": 0.0001426196700677891, "loss": 0.8503, "step": 106700 }, { "epoch": 1.8734528344949877, "grad_norm": 0.06621136824025649, "learning_rate": 0.00014260955875665802, "loss": 0.8499, "step": 106710 }, { "epoch": 1.8736283993749892, "grad_norm": 0.05201835732314325, "learning_rate": 0.00014259944691832587, "loss": 0.8472, "step": 106720 }, { "epoch": 1.8738039642549904, "grad_norm": 0.058640741103336884, "learning_rate": 0.00014258933455292075, "loss": 0.8449, "step": 106730 }, { "epoch": 1.8739795291349919, "grad_norm": 0.05207213865743045, "learning_rate": 0.0001425792216605708, "loss": 0.8492, "step": 106740 }, { "epoch": 1.8741550940149931, "grad_norm": 0.07606162917623273, "learning_rate": 0.00014256910824140414, "loss": 0.8476, "step": 106750 }, { "epoch": 1.8743306588949946, "grad_norm": 0.0501523854528693, "learning_rate": 0.00014255899429554893, "loss": 0.8477, "step": 106760 }, { "epoch": 1.874506223774996, "grad_norm": 0.06521310385406806, "learning_rate": 0.00014254887982313328, "loss": 0.844, "step": 106770 }, { "epoch": 1.8746817886549976, "grad_norm": 0.05710473299849508, "learning_rate": 0.00014253876482428536, "loss": 0.8368, "step": 106780 }, { "epoch": 1.8748573535349988, "grad_norm": 0.05649736123456589, "learning_rate": 0.00014252864929913334, "loss": 0.8472, "step": 106790 }, { "epoch": 1.8750329184150003, "grad_norm": 0.051546050644041955, "learning_rate": 0.00014251853324780535, "loss": 0.8429, "step": 106800 }, { "epoch": 1.8752084832950016, "grad_norm": 0.055776196820393996, "learning_rate": 0.00014250841667042962, "loss": 0.8404, "step": 106810 }, { "epoch": 1.875384048175003, "grad_norm": 0.05447227103761869, "learning_rate": 0.0001424982995671343, "loss": 0.8468, "step": 106820 }, { "epoch": 1.8755596130550045, "grad_norm": 0.06321192990996816, "learning_rate": 0.00014248818193804752, "loss": 0.843, "step": 106830 }, { "epoch": 1.875735177935006, "grad_norm": 0.051129771131792214, "learning_rate": 0.00014247806378329755, "loss": 0.8431, "step": 106840 }, { "epoch": 1.8759107428150072, "grad_norm": 0.05128504330676382, "learning_rate": 0.00014246794510301258, "loss": 0.8467, "step": 106850 }, { "epoch": 1.8760863076950087, "grad_norm": 0.04762875475581306, "learning_rate": 0.00014245782589732077, "loss": 0.8403, "step": 106860 }, { "epoch": 1.87626187257501, "grad_norm": 0.061494135503067975, "learning_rate": 0.00014244770616635038, "loss": 0.8428, "step": 106870 }, { "epoch": 1.8764374374550115, "grad_norm": 0.08505308630518457, "learning_rate": 0.00014243758591022963, "loss": 0.8449, "step": 106880 }, { "epoch": 1.876613002335013, "grad_norm": 0.055887250062141805, "learning_rate": 0.00014242746512908668, "loss": 0.8493, "step": 106890 }, { "epoch": 1.8767885672150144, "grad_norm": 0.06941012518619032, "learning_rate": 0.00014241734382304983, "loss": 0.8419, "step": 106900 }, { "epoch": 1.8769641320950157, "grad_norm": 0.05062300614049672, "learning_rate": 0.0001424072219922473, "loss": 0.8534, "step": 106910 }, { "epoch": 1.8771396969750171, "grad_norm": 0.04685103142956085, "learning_rate": 0.00014239709963680732, "loss": 0.8428, "step": 106920 }, { "epoch": 1.8773152618550184, "grad_norm": 0.056837822228893164, "learning_rate": 0.00014238697675685814, "loss": 0.8555, "step": 106930 }, { "epoch": 1.8774908267350199, "grad_norm": 0.07059317980987856, "learning_rate": 0.00014237685335252807, "loss": 0.8632, "step": 106940 }, { "epoch": 1.8776663916150214, "grad_norm": 0.05713618580226438, "learning_rate": 0.0001423667294239453, "loss": 0.8431, "step": 106950 }, { "epoch": 1.8778419564950228, "grad_norm": 0.061105083315512385, "learning_rate": 0.00014235660497123818, "loss": 0.8536, "step": 106960 }, { "epoch": 1.878017521375024, "grad_norm": 0.05931098888587571, "learning_rate": 0.00014234647999453493, "loss": 0.8523, "step": 106970 }, { "epoch": 1.8781930862550256, "grad_norm": 0.06097482247807706, "learning_rate": 0.00014233635449396382, "loss": 0.842, "step": 106980 }, { "epoch": 1.8783686511350268, "grad_norm": 0.047817585078135334, "learning_rate": 0.00014232622846965324, "loss": 0.8465, "step": 106990 }, { "epoch": 1.8785442160150283, "grad_norm": 0.04086416968896691, "learning_rate": 0.0001423161019217314, "loss": 0.8465, "step": 107000 }, { "epoch": 1.8787197808950298, "grad_norm": 0.06999391716564102, "learning_rate": 0.00014230597485032662, "loss": 0.8457, "step": 107010 }, { "epoch": 1.8788953457750313, "grad_norm": 0.06261146237952987, "learning_rate": 0.0001422958472555672, "loss": 0.8485, "step": 107020 }, { "epoch": 1.8790709106550327, "grad_norm": 0.06653264978714801, "learning_rate": 0.0001422857191375815, "loss": 0.8448, "step": 107030 }, { "epoch": 1.879246475535034, "grad_norm": 0.06419026542779141, "learning_rate": 0.00014227559049649787, "loss": 0.8491, "step": 107040 }, { "epoch": 1.8794220404150352, "grad_norm": 0.07086030047204199, "learning_rate": 0.00014226546133244453, "loss": 0.8527, "step": 107050 }, { "epoch": 1.8795976052950367, "grad_norm": 0.04884721304963423, "learning_rate": 0.00014225533164554993, "loss": 0.8412, "step": 107060 }, { "epoch": 1.8797731701750382, "grad_norm": 0.05047404509011464, "learning_rate": 0.00014224520143594235, "loss": 0.8569, "step": 107070 }, { "epoch": 1.8799487350550397, "grad_norm": 0.050915546279555954, "learning_rate": 0.0001422350707037502, "loss": 0.8449, "step": 107080 }, { "epoch": 1.8801242999350412, "grad_norm": 0.08279230265072478, "learning_rate": 0.00014222493944910178, "loss": 0.8499, "step": 107090 }, { "epoch": 1.8802998648150424, "grad_norm": 0.05979296312296291, "learning_rate": 0.00014221480767212546, "loss": 0.8421, "step": 107100 }, { "epoch": 1.8804754296950437, "grad_norm": 0.05725569416508, "learning_rate": 0.00014220467537294965, "loss": 0.851, "step": 107110 }, { "epoch": 1.8806509945750451, "grad_norm": 0.056874237072009635, "learning_rate": 0.00014219454255170268, "loss": 0.8469, "step": 107120 }, { "epoch": 1.8808265594550466, "grad_norm": 0.07656319910571784, "learning_rate": 0.00014218440920851297, "loss": 0.8541, "step": 107130 }, { "epoch": 1.881002124335048, "grad_norm": 0.06474535908425746, "learning_rate": 0.00014217427534350888, "loss": 0.8464, "step": 107140 }, { "epoch": 1.8811776892150496, "grad_norm": 0.06186571706960451, "learning_rate": 0.00014216414095681888, "loss": 0.8485, "step": 107150 }, { "epoch": 1.8813532540950508, "grad_norm": 0.07320283249497198, "learning_rate": 0.0001421540060485713, "loss": 0.848, "step": 107160 }, { "epoch": 1.881528818975052, "grad_norm": 0.0730864233251961, "learning_rate": 0.00014214387061889462, "loss": 0.8406, "step": 107170 }, { "epoch": 1.8817043838550536, "grad_norm": 0.05476893690260848, "learning_rate": 0.00014213373466791718, "loss": 0.8542, "step": 107180 }, { "epoch": 1.881879948735055, "grad_norm": 0.07025244372421353, "learning_rate": 0.0001421235981957674, "loss": 0.8567, "step": 107190 }, { "epoch": 1.8820555136150565, "grad_norm": 0.05379338310814346, "learning_rate": 0.0001421134612025738, "loss": 0.8475, "step": 107200 }, { "epoch": 1.882231078495058, "grad_norm": 0.04722433418647219, "learning_rate": 0.00014210332368846476, "loss": 0.8412, "step": 107210 }, { "epoch": 1.8824066433750593, "grad_norm": 0.05767838207549319, "learning_rate": 0.0001420931856535687, "loss": 0.8473, "step": 107220 }, { "epoch": 1.8825822082550605, "grad_norm": 0.07112488756503287, "learning_rate": 0.00014208304709801414, "loss": 0.8499, "step": 107230 }, { "epoch": 1.882757773135062, "grad_norm": 0.06744766689324587, "learning_rate": 0.00014207290802192946, "loss": 0.8468, "step": 107240 }, { "epoch": 1.8829333380150635, "grad_norm": 0.05413876776208277, "learning_rate": 0.0001420627684254432, "loss": 0.8576, "step": 107250 }, { "epoch": 1.883108902895065, "grad_norm": 0.07420114260325573, "learning_rate": 0.0001420526283086838, "loss": 0.8486, "step": 107260 }, { "epoch": 1.8832844677750664, "grad_norm": 0.04801823388160254, "learning_rate": 0.0001420424876717797, "loss": 0.8505, "step": 107270 }, { "epoch": 1.8834600326550677, "grad_norm": 0.07221812508160749, "learning_rate": 0.00014203234651485945, "loss": 0.8433, "step": 107280 }, { "epoch": 1.883635597535069, "grad_norm": 0.04684675881877558, "learning_rate": 0.00014202220483805144, "loss": 0.8436, "step": 107290 }, { "epoch": 1.8838111624150704, "grad_norm": 0.062126216312158436, "learning_rate": 0.0001420120626414843, "loss": 0.8407, "step": 107300 }, { "epoch": 1.8839867272950719, "grad_norm": 0.059683336107710615, "learning_rate": 0.00014200191992528644, "loss": 0.8437, "step": 107310 }, { "epoch": 1.8841622921750734, "grad_norm": 0.049621776165936916, "learning_rate": 0.0001419917766895864, "loss": 0.847, "step": 107320 }, { "epoch": 1.8843378570550748, "grad_norm": 0.054866762711107776, "learning_rate": 0.00014198163293451272, "loss": 0.8453, "step": 107330 }, { "epoch": 1.884513421935076, "grad_norm": 0.06883726828216678, "learning_rate": 0.00014197148866019387, "loss": 0.8547, "step": 107340 }, { "epoch": 1.8846889868150773, "grad_norm": 0.05777454013858833, "learning_rate": 0.00014196134386675838, "loss": 0.8466, "step": 107350 }, { "epoch": 1.8848645516950788, "grad_norm": 0.04714107499095379, "learning_rate": 0.00014195119855433484, "loss": 0.8469, "step": 107360 }, { "epoch": 1.8850401165750803, "grad_norm": 0.04822490221063911, "learning_rate": 0.00014194105272305178, "loss": 0.8461, "step": 107370 }, { "epoch": 1.8852156814550818, "grad_norm": 0.05755602217200438, "learning_rate": 0.00014193090637303773, "loss": 0.8526, "step": 107380 }, { "epoch": 1.8853912463350833, "grad_norm": 0.07767271905554261, "learning_rate": 0.00014192075950442124, "loss": 0.8425, "step": 107390 }, { "epoch": 1.8855668112150845, "grad_norm": 0.06326985499505673, "learning_rate": 0.00014191061211733086, "loss": 0.8396, "step": 107400 }, { "epoch": 1.885742376095086, "grad_norm": 0.06557530274851213, "learning_rate": 0.00014190046421189522, "loss": 0.8453, "step": 107410 }, { "epoch": 1.8859179409750872, "grad_norm": 0.05094212131436509, "learning_rate": 0.00014189031578824284, "loss": 0.8506, "step": 107420 }, { "epoch": 1.8860935058550887, "grad_norm": 0.06670241020488299, "learning_rate": 0.00014188016684650237, "loss": 0.842, "step": 107430 }, { "epoch": 1.8862690707350902, "grad_norm": 0.07395802676178292, "learning_rate": 0.00014187001738680233, "loss": 0.8379, "step": 107440 }, { "epoch": 1.8864446356150917, "grad_norm": 0.0835616541873766, "learning_rate": 0.00014185986740927132, "loss": 0.8363, "step": 107450 }, { "epoch": 1.886620200495093, "grad_norm": 0.06240295038700513, "learning_rate": 0.00014184971691403792, "loss": 0.8469, "step": 107460 }, { "epoch": 1.8867957653750944, "grad_norm": 0.12309507692733078, "learning_rate": 0.00014183956590123085, "loss": 0.8418, "step": 107470 }, { "epoch": 1.8869713302550957, "grad_norm": 0.0643614759255706, "learning_rate": 0.0001418294143709786, "loss": 0.8444, "step": 107480 }, { "epoch": 1.8871468951350971, "grad_norm": 0.055088249613504475, "learning_rate": 0.00014181926232340988, "loss": 0.8469, "step": 107490 }, { "epoch": 1.8873224600150986, "grad_norm": 0.060458193505361635, "learning_rate": 0.0001418091097586533, "loss": 0.8358, "step": 107500 }, { "epoch": 1.8874980248951, "grad_norm": 0.06231488983709582, "learning_rate": 0.0001417989566768374, "loss": 0.8451, "step": 107510 }, { "epoch": 1.8876735897751014, "grad_norm": 0.07789856960182835, "learning_rate": 0.00014178880307809096, "loss": 0.8454, "step": 107520 }, { "epoch": 1.8878491546551028, "grad_norm": 0.07616474544878739, "learning_rate": 0.00014177864896254254, "loss": 0.8547, "step": 107530 }, { "epoch": 1.888024719535104, "grad_norm": 0.05300663868160519, "learning_rate": 0.00014176849433032086, "loss": 0.8516, "step": 107540 }, { "epoch": 1.8882002844151056, "grad_norm": 0.053665085414996046, "learning_rate": 0.00014175833918155455, "loss": 0.8469, "step": 107550 }, { "epoch": 1.888375849295107, "grad_norm": 0.04924250682414715, "learning_rate": 0.00014174818351637224, "loss": 0.8574, "step": 107560 }, { "epoch": 1.8885514141751085, "grad_norm": 0.06290635314423378, "learning_rate": 0.00014173802733490263, "loss": 0.8451, "step": 107570 }, { "epoch": 1.8887269790551098, "grad_norm": 0.06732807430868974, "learning_rate": 0.00014172787063727443, "loss": 0.847, "step": 107580 }, { "epoch": 1.8889025439351113, "grad_norm": 0.05172818029006134, "learning_rate": 0.00014171771342361627, "loss": 0.8516, "step": 107590 }, { "epoch": 1.8890781088151125, "grad_norm": 0.06351186401231626, "learning_rate": 0.0001417075556940569, "loss": 0.8422, "step": 107600 }, { "epoch": 1.889253673695114, "grad_norm": 0.057293301440097075, "learning_rate": 0.000141697397448725, "loss": 0.8478, "step": 107610 }, { "epoch": 1.8894292385751155, "grad_norm": 0.0673215092221544, "learning_rate": 0.00014168723868774927, "loss": 0.8395, "step": 107620 }, { "epoch": 1.889604803455117, "grad_norm": 0.04823121354755642, "learning_rate": 0.0001416770794112584, "loss": 0.8482, "step": 107630 }, { "epoch": 1.8897803683351182, "grad_norm": 0.059311494600987495, "learning_rate": 0.0001416669196193812, "loss": 0.8441, "step": 107640 }, { "epoch": 1.8899559332151197, "grad_norm": 0.0840162613643754, "learning_rate": 0.00014165675931224626, "loss": 0.8499, "step": 107650 }, { "epoch": 1.890131498095121, "grad_norm": 0.048877394575244884, "learning_rate": 0.00014164659848998247, "loss": 0.8539, "step": 107660 }, { "epoch": 1.8903070629751224, "grad_norm": 0.061874923136603706, "learning_rate": 0.00014163643715271843, "loss": 0.8429, "step": 107670 }, { "epoch": 1.8904826278551239, "grad_norm": 0.05633089843058496, "learning_rate": 0.00014162627530058298, "loss": 0.8324, "step": 107680 }, { "epoch": 1.8906581927351254, "grad_norm": 0.05115625617394214, "learning_rate": 0.0001416161129337048, "loss": 0.8472, "step": 107690 }, { "epoch": 1.8908337576151266, "grad_norm": 0.06915576366332989, "learning_rate": 0.00014160595005221268, "loss": 0.8471, "step": 107700 }, { "epoch": 1.891009322495128, "grad_norm": 0.05039608585626326, "learning_rate": 0.00014159578665623543, "loss": 0.8472, "step": 107710 }, { "epoch": 1.8911848873751294, "grad_norm": 0.05418986849129086, "learning_rate": 0.00014158562274590178, "loss": 0.8493, "step": 107720 }, { "epoch": 1.8913604522551308, "grad_norm": 0.0637601971100921, "learning_rate": 0.0001415754583213405, "loss": 0.8464, "step": 107730 }, { "epoch": 1.8915360171351323, "grad_norm": 0.0520977026215685, "learning_rate": 0.00014156529338268036, "loss": 0.8418, "step": 107740 }, { "epoch": 1.8917115820151338, "grad_norm": 0.048837147787480155, "learning_rate": 0.0001415551279300502, "loss": 0.8474, "step": 107750 }, { "epoch": 1.8918871468951353, "grad_norm": 0.06491819592977356, "learning_rate": 0.0001415449619635788, "loss": 0.8531, "step": 107760 }, { "epoch": 1.8920627117751365, "grad_norm": 0.04781228622129684, "learning_rate": 0.00014153479548339497, "loss": 0.844, "step": 107770 }, { "epoch": 1.8922382766551378, "grad_norm": 0.0670408520734645, "learning_rate": 0.0001415246284896275, "loss": 0.8562, "step": 107780 }, { "epoch": 1.8924138415351393, "grad_norm": 0.05855727320240304, "learning_rate": 0.00014151446098240522, "loss": 0.8405, "step": 107790 }, { "epoch": 1.8925894064151407, "grad_norm": 0.05332822163093553, "learning_rate": 0.00014150429296185696, "loss": 0.8566, "step": 107800 }, { "epoch": 1.8927649712951422, "grad_norm": 0.04945567835346476, "learning_rate": 0.00014149412442811154, "loss": 0.8525, "step": 107810 }, { "epoch": 1.8929405361751437, "grad_norm": 0.053944223908817014, "learning_rate": 0.00014148395538129778, "loss": 0.8512, "step": 107820 }, { "epoch": 1.893116101055145, "grad_norm": 0.048007295765710396, "learning_rate": 0.00014147378582154461, "loss": 0.8452, "step": 107830 }, { "epoch": 1.8932916659351462, "grad_norm": 0.07807557004147252, "learning_rate": 0.00014146361574898076, "loss": 0.8347, "step": 107840 }, { "epoch": 1.8934672308151477, "grad_norm": 0.05084074038597083, "learning_rate": 0.00014145344516373518, "loss": 0.8514, "step": 107850 }, { "epoch": 1.8936427956951492, "grad_norm": 0.06734713327522247, "learning_rate": 0.00014144327406593666, "loss": 0.8482, "step": 107860 }, { "epoch": 1.8938183605751506, "grad_norm": 0.07760737518094328, "learning_rate": 0.00014143310245571413, "loss": 0.8449, "step": 107870 }, { "epoch": 1.893993925455152, "grad_norm": 0.06337041605995325, "learning_rate": 0.00014142293033319646, "loss": 0.8447, "step": 107880 }, { "epoch": 1.8941694903351534, "grad_norm": 0.06403281990195644, "learning_rate": 0.0001414127576985125, "loss": 0.8536, "step": 107890 }, { "epoch": 1.8943450552151546, "grad_norm": 0.08533104214737414, "learning_rate": 0.0001414025845517911, "loss": 0.8496, "step": 107900 }, { "epoch": 1.894520620095156, "grad_norm": 0.05500028585551226, "learning_rate": 0.0001413924108931613, "loss": 0.8516, "step": 107910 }, { "epoch": 1.8946961849751576, "grad_norm": 0.048955904620565614, "learning_rate": 0.00014138223672275187, "loss": 0.8458, "step": 107920 }, { "epoch": 1.894871749855159, "grad_norm": 0.05491596207331266, "learning_rate": 0.00014137206204069174, "loss": 0.8467, "step": 107930 }, { "epoch": 1.8950473147351605, "grad_norm": 0.05260274246286499, "learning_rate": 0.0001413618868471099, "loss": 0.8522, "step": 107940 }, { "epoch": 1.8952228796151618, "grad_norm": 0.04268190440183228, "learning_rate": 0.00014135171114213518, "loss": 0.8492, "step": 107950 }, { "epoch": 1.895398444495163, "grad_norm": 0.052742469417618634, "learning_rate": 0.00014134153492589656, "loss": 0.8455, "step": 107960 }, { "epoch": 1.8955740093751645, "grad_norm": 0.051877583604409236, "learning_rate": 0.00014133135819852295, "loss": 0.8534, "step": 107970 }, { "epoch": 1.895749574255166, "grad_norm": 0.06891095149725714, "learning_rate": 0.0001413211809601433, "loss": 0.8481, "step": 107980 }, { "epoch": 1.8959251391351675, "grad_norm": 0.06327544022969567, "learning_rate": 0.00014131100321088658, "loss": 0.8443, "step": 107990 }, { "epoch": 1.896100704015169, "grad_norm": 0.06886207825649504, "learning_rate": 0.00014130082495088171, "loss": 0.8393, "step": 108000 }, { "epoch": 1.8962762688951702, "grad_norm": 0.04671398416622909, "learning_rate": 0.00014129064618025768, "loss": 0.8508, "step": 108010 }, { "epoch": 1.8964518337751715, "grad_norm": 0.05240672431194136, "learning_rate": 0.0001412804668991434, "loss": 0.8425, "step": 108020 }, { "epoch": 1.896627398655173, "grad_norm": 0.07130189768631692, "learning_rate": 0.00014127028710766794, "loss": 0.8539, "step": 108030 }, { "epoch": 1.8968029635351744, "grad_norm": 0.05013359050011324, "learning_rate": 0.00014126010680596017, "loss": 0.8518, "step": 108040 }, { "epoch": 1.896978528415176, "grad_norm": 0.06674445799024864, "learning_rate": 0.00014124992599414917, "loss": 0.8572, "step": 108050 }, { "epoch": 1.8971540932951774, "grad_norm": 0.08033225024067958, "learning_rate": 0.0001412397446723639, "loss": 0.8462, "step": 108060 }, { "epoch": 1.8973296581751786, "grad_norm": 0.047149167130092304, "learning_rate": 0.00014122956284073332, "loss": 0.8437, "step": 108070 }, { "epoch": 1.8975052230551799, "grad_norm": 0.06644263957529475, "learning_rate": 0.0001412193804993865, "loss": 0.8456, "step": 108080 }, { "epoch": 1.8976807879351814, "grad_norm": 0.10469896484280211, "learning_rate": 0.00014120919764845237, "loss": 0.8428, "step": 108090 }, { "epoch": 1.8978563528151828, "grad_norm": 0.05441651620206813, "learning_rate": 0.00014119901428806008, "loss": 0.841, "step": 108100 }, { "epoch": 1.8980319176951843, "grad_norm": 0.0629302947625853, "learning_rate": 0.0001411888304183385, "loss": 0.8524, "step": 108110 }, { "epoch": 1.8982074825751858, "grad_norm": 0.09052496886701328, "learning_rate": 0.00014117864603941678, "loss": 0.8483, "step": 108120 }, { "epoch": 1.898383047455187, "grad_norm": 0.046545689701206365, "learning_rate": 0.0001411684611514239, "loss": 0.8473, "step": 108130 }, { "epoch": 1.8985586123351885, "grad_norm": 0.05506670920975157, "learning_rate": 0.0001411582757544889, "loss": 0.8466, "step": 108140 }, { "epoch": 1.8987341772151898, "grad_norm": 0.07989729092275612, "learning_rate": 0.0001411480898487409, "loss": 0.8434, "step": 108150 }, { "epoch": 1.8989097420951913, "grad_norm": 0.04895512959265241, "learning_rate": 0.00014113790343430886, "loss": 0.8444, "step": 108160 }, { "epoch": 1.8990853069751927, "grad_norm": 0.05815092475242129, "learning_rate": 0.0001411277165113219, "loss": 0.8472, "step": 108170 }, { "epoch": 1.8992608718551942, "grad_norm": 0.05642132449511782, "learning_rate": 0.00014111752907990913, "loss": 0.8437, "step": 108180 }, { "epoch": 1.8994364367351955, "grad_norm": 0.06242399528410662, "learning_rate": 0.0001411073411401995, "loss": 0.8462, "step": 108190 }, { "epoch": 1.899612001615197, "grad_norm": 0.06411775789754191, "learning_rate": 0.00014109715269232221, "loss": 0.8507, "step": 108200 }, { "epoch": 1.8997875664951982, "grad_norm": 0.05989055344931941, "learning_rate": 0.00014108696373640634, "loss": 0.849, "step": 108210 }, { "epoch": 1.8999631313751997, "grad_norm": 0.04999944208962908, "learning_rate": 0.0001410767742725809, "loss": 0.8454, "step": 108220 }, { "epoch": 1.9001386962552012, "grad_norm": 0.061125782778037596, "learning_rate": 0.00014106658430097508, "loss": 0.8453, "step": 108230 }, { "epoch": 1.9003142611352026, "grad_norm": 0.058493903619202384, "learning_rate": 0.00014105639382171797, "loss": 0.8409, "step": 108240 }, { "epoch": 1.9004898260152039, "grad_norm": 0.05465425728049224, "learning_rate": 0.00014104620283493866, "loss": 0.8435, "step": 108250 }, { "epoch": 1.9006653908952054, "grad_norm": 0.0752546653543732, "learning_rate": 0.00014103601134076625, "loss": 0.838, "step": 108260 }, { "epoch": 1.9008409557752066, "grad_norm": 0.05950518474494803, "learning_rate": 0.00014102581933932994, "loss": 0.8495, "step": 108270 }, { "epoch": 1.901016520655208, "grad_norm": 0.08677785983640798, "learning_rate": 0.00014101562683075885, "loss": 0.8475, "step": 108280 }, { "epoch": 1.9011920855352096, "grad_norm": 0.056737299995773274, "learning_rate": 0.00014100543381518208, "loss": 0.8484, "step": 108290 }, { "epoch": 1.901367650415211, "grad_norm": 0.06150333002641156, "learning_rate": 0.00014099524029272876, "loss": 0.8406, "step": 108300 }, { "epoch": 1.9015432152952123, "grad_norm": 0.055500822469179395, "learning_rate": 0.0001409850462635281, "loss": 0.8538, "step": 108310 }, { "epoch": 1.9017187801752138, "grad_norm": 0.06849014010795729, "learning_rate": 0.00014097485172770929, "loss": 0.8523, "step": 108320 }, { "epoch": 1.901894345055215, "grad_norm": 0.06510865282463428, "learning_rate": 0.00014096465668540138, "loss": 0.8409, "step": 108330 }, { "epoch": 1.9020699099352165, "grad_norm": 0.07215788809050788, "learning_rate": 0.00014095446113673366, "loss": 0.8532, "step": 108340 }, { "epoch": 1.902245474815218, "grad_norm": 0.04894762146029988, "learning_rate": 0.00014094426508183525, "loss": 0.8411, "step": 108350 }, { "epoch": 1.9024210396952195, "grad_norm": 0.06381695592213568, "learning_rate": 0.00014093406852083533, "loss": 0.8471, "step": 108360 }, { "epoch": 1.9025966045752207, "grad_norm": 0.0433647993516044, "learning_rate": 0.0001409238714538631, "loss": 0.8514, "step": 108370 }, { "epoch": 1.9027721694552222, "grad_norm": 0.04800673712762564, "learning_rate": 0.00014091367388104778, "loss": 0.843, "step": 108380 }, { "epoch": 1.9029477343352235, "grad_norm": 0.04452944757244378, "learning_rate": 0.00014090347580251856, "loss": 0.8403, "step": 108390 }, { "epoch": 1.903123299215225, "grad_norm": 0.07097054075556722, "learning_rate": 0.00014089327721840465, "loss": 0.8435, "step": 108400 }, { "epoch": 1.9032988640952264, "grad_norm": 0.05514961772378471, "learning_rate": 0.00014088307812883528, "loss": 0.8435, "step": 108410 }, { "epoch": 1.903474428975228, "grad_norm": 0.10057120002010742, "learning_rate": 0.0001408728785339397, "loss": 0.8486, "step": 108420 }, { "epoch": 1.9036499938552291, "grad_norm": 0.05689106817574387, "learning_rate": 0.00014086267843384703, "loss": 0.8476, "step": 108430 }, { "epoch": 1.9038255587352306, "grad_norm": 0.0622920908222074, "learning_rate": 0.00014085247782868665, "loss": 0.8379, "step": 108440 }, { "epoch": 1.9040011236152319, "grad_norm": 0.051952681743626196, "learning_rate": 0.0001408422767185877, "loss": 0.8422, "step": 108450 }, { "epoch": 1.9041766884952334, "grad_norm": 0.05840574954758984, "learning_rate": 0.00014083207510367954, "loss": 0.8416, "step": 108460 }, { "epoch": 1.9043522533752348, "grad_norm": 0.04790376409757451, "learning_rate": 0.00014082187298409125, "loss": 0.8452, "step": 108470 }, { "epoch": 1.9045278182552363, "grad_norm": 0.06876775747933524, "learning_rate": 0.00014081167035995224, "loss": 0.8447, "step": 108480 }, { "epoch": 1.9047033831352378, "grad_norm": 0.05461691620414574, "learning_rate": 0.00014080146723139176, "loss": 0.8426, "step": 108490 }, { "epoch": 1.904878948015239, "grad_norm": 0.05606552498528673, "learning_rate": 0.00014079126359853905, "loss": 0.8502, "step": 108500 }, { "epoch": 1.9050545128952403, "grad_norm": 0.04862585245920811, "learning_rate": 0.0001407810594615234, "loss": 0.8457, "step": 108510 }, { "epoch": 1.9052300777752418, "grad_norm": 0.04389575161882601, "learning_rate": 0.0001407708548204741, "loss": 0.8541, "step": 108520 }, { "epoch": 1.9054056426552433, "grad_norm": 0.059297975787292734, "learning_rate": 0.00014076064967552044, "loss": 0.8461, "step": 108530 }, { "epoch": 1.9055812075352447, "grad_norm": 0.05466549868710481, "learning_rate": 0.00014075044402679174, "loss": 0.8457, "step": 108540 }, { "epoch": 1.9057567724152462, "grad_norm": 0.051362282021734505, "learning_rate": 0.00014074023787441728, "loss": 0.8451, "step": 108550 }, { "epoch": 1.9059323372952475, "grad_norm": 0.06356141288763777, "learning_rate": 0.00014073003121852642, "loss": 0.8367, "step": 108560 }, { "epoch": 1.9061079021752487, "grad_norm": 0.05168308454784958, "learning_rate": 0.00014071982405924844, "loss": 0.8436, "step": 108570 }, { "epoch": 1.9062834670552502, "grad_norm": 0.049354065173214466, "learning_rate": 0.00014070961639671268, "loss": 0.8435, "step": 108580 }, { "epoch": 1.9064590319352517, "grad_norm": 0.059741485079001844, "learning_rate": 0.00014069940823104848, "loss": 0.842, "step": 108590 }, { "epoch": 1.9066345968152532, "grad_norm": 0.055434290797836454, "learning_rate": 0.0001406891995623851, "loss": 0.847, "step": 108600 }, { "epoch": 1.9068101616952546, "grad_norm": 0.050470466246593605, "learning_rate": 0.00014067899039085205, "loss": 0.8525, "step": 108610 }, { "epoch": 1.906985726575256, "grad_norm": 0.0550273245575837, "learning_rate": 0.00014066878071657853, "loss": 0.8437, "step": 108620 }, { "epoch": 1.9071612914552571, "grad_norm": 0.07000598880629592, "learning_rate": 0.000140658570539694, "loss": 0.8449, "step": 108630 }, { "epoch": 1.9073368563352586, "grad_norm": 0.06665386800025123, "learning_rate": 0.0001406483598603277, "loss": 0.8448, "step": 108640 }, { "epoch": 1.90751242121526, "grad_norm": 0.05739585879047912, "learning_rate": 0.00014063814867860914, "loss": 0.8378, "step": 108650 }, { "epoch": 1.9076879860952616, "grad_norm": 0.07393926304853243, "learning_rate": 0.00014062793699466764, "loss": 0.8458, "step": 108660 }, { "epoch": 1.907863550975263, "grad_norm": 0.04636745848572456, "learning_rate": 0.00014061772480863257, "loss": 0.8471, "step": 108670 }, { "epoch": 1.9080391158552643, "grad_norm": 0.056886732161853075, "learning_rate": 0.00014060751212063334, "loss": 0.8418, "step": 108680 }, { "epoch": 1.9082146807352656, "grad_norm": 0.06010053147826944, "learning_rate": 0.00014059729893079937, "loss": 0.8459, "step": 108690 }, { "epoch": 1.908390245615267, "grad_norm": 0.05570348717273528, "learning_rate": 0.00014058708523925997, "loss": 0.8452, "step": 108700 }, { "epoch": 1.9085658104952685, "grad_norm": 0.043798744690329544, "learning_rate": 0.00014057687104614464, "loss": 0.8476, "step": 108710 }, { "epoch": 1.90874137537527, "grad_norm": 0.05674659432419544, "learning_rate": 0.00014056665635158277, "loss": 0.8356, "step": 108720 }, { "epoch": 1.9089169402552715, "grad_norm": 0.040027467941607435, "learning_rate": 0.0001405564411557038, "loss": 0.8473, "step": 108730 }, { "epoch": 1.9090925051352727, "grad_norm": 0.07320322339357423, "learning_rate": 0.00014054622545863713, "loss": 0.8512, "step": 108740 }, { "epoch": 1.909268070015274, "grad_norm": 0.05807409866479108, "learning_rate": 0.0001405360092605122, "loss": 0.8474, "step": 108750 }, { "epoch": 1.9094436348952755, "grad_norm": 0.06973971760343531, "learning_rate": 0.00014052579256145848, "loss": 0.853, "step": 108760 }, { "epoch": 1.909619199775277, "grad_norm": 0.06048710126583099, "learning_rate": 0.00014051557536160536, "loss": 0.8477, "step": 108770 }, { "epoch": 1.9097947646552784, "grad_norm": 0.06662035588948004, "learning_rate": 0.00014050535766108233, "loss": 0.8407, "step": 108780 }, { "epoch": 1.90997032953528, "grad_norm": 0.0566361794557259, "learning_rate": 0.00014049513946001887, "loss": 0.848, "step": 108790 }, { "epoch": 1.9101458944152812, "grad_norm": 0.048099938614052797, "learning_rate": 0.00014048492075854443, "loss": 0.851, "step": 108800 }, { "epoch": 1.9103214592952824, "grad_norm": 0.05825740718999886, "learning_rate": 0.00014047470155678844, "loss": 0.8431, "step": 108810 }, { "epoch": 1.9104970241752839, "grad_norm": 0.05128485400278828, "learning_rate": 0.00014046448185488038, "loss": 0.845, "step": 108820 }, { "epoch": 1.9106725890552854, "grad_norm": 0.062375680735361615, "learning_rate": 0.00014045426165294986, "loss": 0.8491, "step": 108830 }, { "epoch": 1.9108481539352868, "grad_norm": 0.0695809499700619, "learning_rate": 0.0001404440409511262, "loss": 0.8455, "step": 108840 }, { "epoch": 1.9110237188152883, "grad_norm": 0.04966323015519648, "learning_rate": 0.00014043381974953906, "loss": 0.8455, "step": 108850 }, { "epoch": 1.9111992836952896, "grad_norm": 0.0543653883322364, "learning_rate": 0.00014042359804831785, "loss": 0.8505, "step": 108860 }, { "epoch": 1.911374848575291, "grad_norm": 0.07059276273912259, "learning_rate": 0.00014041337584759208, "loss": 0.8448, "step": 108870 }, { "epoch": 1.9115504134552923, "grad_norm": 0.06050247972385024, "learning_rate": 0.0001404031531474913, "loss": 0.8402, "step": 108880 }, { "epoch": 1.9117259783352938, "grad_norm": 0.05192358779536679, "learning_rate": 0.00014039292994814497, "loss": 0.8478, "step": 108890 }, { "epoch": 1.9119015432152953, "grad_norm": 0.06754939331277159, "learning_rate": 0.00014038270624968272, "loss": 0.8405, "step": 108900 }, { "epoch": 1.9120771080952967, "grad_norm": 0.06729861697934697, "learning_rate": 0.000140372482052234, "loss": 0.849, "step": 108910 }, { "epoch": 1.912252672975298, "grad_norm": 0.057269650318284425, "learning_rate": 0.0001403622573559284, "loss": 0.8542, "step": 108920 }, { "epoch": 1.9124282378552995, "grad_norm": 0.06792431400727635, "learning_rate": 0.00014035203216089548, "loss": 0.8474, "step": 108930 }, { "epoch": 1.9126038027353007, "grad_norm": 0.05210475335255505, "learning_rate": 0.00014034180646726473, "loss": 0.8478, "step": 108940 }, { "epoch": 1.9127793676153022, "grad_norm": 0.07137871548764076, "learning_rate": 0.00014033158027516578, "loss": 0.8436, "step": 108950 }, { "epoch": 1.9129549324953037, "grad_norm": 0.0685170917587989, "learning_rate": 0.00014032135358472815, "loss": 0.846, "step": 108960 }, { "epoch": 1.9131304973753052, "grad_norm": 0.06395082215294239, "learning_rate": 0.00014031112639608146, "loss": 0.8449, "step": 108970 }, { "epoch": 1.9133060622553064, "grad_norm": 0.06472451947959022, "learning_rate": 0.00014030089870935525, "loss": 0.8457, "step": 108980 }, { "epoch": 1.913481627135308, "grad_norm": 0.06508392025049282, "learning_rate": 0.0001402906705246791, "loss": 0.8429, "step": 108990 }, { "epoch": 1.9136571920153091, "grad_norm": 0.0648303100569029, "learning_rate": 0.00014028044184218267, "loss": 0.8534, "step": 109000 }, { "epoch": 1.9138327568953106, "grad_norm": 0.049307810041276386, "learning_rate": 0.00014027021266199547, "loss": 0.8445, "step": 109010 }, { "epoch": 1.914008321775312, "grad_norm": 0.07176977930937767, "learning_rate": 0.0001402599829842472, "loss": 0.8471, "step": 109020 }, { "epoch": 1.9141838866553136, "grad_norm": 0.06177694039333116, "learning_rate": 0.00014024975280906737, "loss": 0.8456, "step": 109030 }, { "epoch": 1.9143594515353148, "grad_norm": 0.06999889748006373, "learning_rate": 0.00014023952213658566, "loss": 0.8465, "step": 109040 }, { "epoch": 1.9145350164153163, "grad_norm": 0.07429515754065107, "learning_rate": 0.00014022929096693172, "loss": 0.8484, "step": 109050 }, { "epoch": 1.9147105812953176, "grad_norm": 0.0644837563595987, "learning_rate": 0.0001402190593002351, "loss": 0.853, "step": 109060 }, { "epoch": 1.914886146175319, "grad_norm": 0.05921236745748017, "learning_rate": 0.0001402088271366255, "loss": 0.8456, "step": 109070 }, { "epoch": 1.9150617110553205, "grad_norm": 0.04656384317855633, "learning_rate": 0.00014019859447623255, "loss": 0.845, "step": 109080 }, { "epoch": 1.915237275935322, "grad_norm": 0.05206015243711889, "learning_rate": 0.00014018836131918588, "loss": 0.8481, "step": 109090 }, { "epoch": 1.9154128408153233, "grad_norm": 0.06351582137420657, "learning_rate": 0.0001401781276656152, "loss": 0.846, "step": 109100 }, { "epoch": 1.9155884056953247, "grad_norm": 0.07567837322953253, "learning_rate": 0.00014016789351565007, "loss": 0.8481, "step": 109110 }, { "epoch": 1.915763970575326, "grad_norm": 0.07370846910471293, "learning_rate": 0.0001401576588694203, "loss": 0.8477, "step": 109120 }, { "epoch": 1.9159395354553275, "grad_norm": 0.059045983361408004, "learning_rate": 0.00014014742372705545, "loss": 0.8473, "step": 109130 }, { "epoch": 1.916115100335329, "grad_norm": 0.08185170310352748, "learning_rate": 0.00014013718808868523, "loss": 0.8442, "step": 109140 }, { "epoch": 1.9162906652153304, "grad_norm": 0.0611465855854901, "learning_rate": 0.00014012695195443934, "loss": 0.8424, "step": 109150 }, { "epoch": 1.9164662300953317, "grad_norm": 0.06606384355049631, "learning_rate": 0.00014011671532444746, "loss": 0.8507, "step": 109160 }, { "epoch": 1.9166417949753332, "grad_norm": 0.04626462745905083, "learning_rate": 0.00014010647819883934, "loss": 0.8516, "step": 109170 }, { "epoch": 1.9168173598553344, "grad_norm": 0.07024736439805652, "learning_rate": 0.0001400962405777446, "loss": 0.8412, "step": 109180 }, { "epoch": 1.916992924735336, "grad_norm": 0.051064654704056994, "learning_rate": 0.00014008600246129304, "loss": 0.8413, "step": 109190 }, { "epoch": 1.9171684896153374, "grad_norm": 0.06400710338605019, "learning_rate": 0.00014007576384961434, "loss": 0.8434, "step": 109200 }, { "epoch": 1.9173440544953388, "grad_norm": 0.06078629904104021, "learning_rate": 0.0001400655247428382, "loss": 0.8412, "step": 109210 }, { "epoch": 1.9175196193753403, "grad_norm": 0.040037408112853556, "learning_rate": 0.00014005528514109437, "loss": 0.8474, "step": 109220 }, { "epoch": 1.9176951842553416, "grad_norm": 0.05394502441971714, "learning_rate": 0.00014004504504451257, "loss": 0.841, "step": 109230 }, { "epoch": 1.9178707491353428, "grad_norm": 0.056065289916888264, "learning_rate": 0.0001400348044532226, "loss": 0.8462, "step": 109240 }, { "epoch": 1.9180463140153443, "grad_norm": 0.05271756565695092, "learning_rate": 0.0001400245633673542, "loss": 0.8491, "step": 109250 }, { "epoch": 1.9182218788953458, "grad_norm": 0.05527894040234634, "learning_rate": 0.00014001432178703703, "loss": 0.8505, "step": 109260 }, { "epoch": 1.9183974437753473, "grad_norm": 0.05637605731152893, "learning_rate": 0.00014000407971240098, "loss": 0.85, "step": 109270 }, { "epoch": 1.9185730086553487, "grad_norm": 0.07000617243914982, "learning_rate": 0.00013999383714357575, "loss": 0.8491, "step": 109280 }, { "epoch": 1.91874857353535, "grad_norm": 0.05542083997295546, "learning_rate": 0.00013998359408069115, "loss": 0.8509, "step": 109290 }, { "epoch": 1.9189241384153513, "grad_norm": 0.045338515380453144, "learning_rate": 0.00013997335052387696, "loss": 0.8553, "step": 109300 }, { "epoch": 1.9190997032953527, "grad_norm": 0.06074213584416928, "learning_rate": 0.0001399631064732629, "loss": 0.8605, "step": 109310 }, { "epoch": 1.9192752681753542, "grad_norm": 0.06432284858839514, "learning_rate": 0.00013995286192897884, "loss": 0.8422, "step": 109320 }, { "epoch": 1.9194508330553557, "grad_norm": 0.05767675634166488, "learning_rate": 0.0001399426168911545, "loss": 0.8446, "step": 109330 }, { "epoch": 1.9196263979353572, "grad_norm": 0.055823504261203916, "learning_rate": 0.0001399323713599198, "loss": 0.8416, "step": 109340 }, { "epoch": 1.9198019628153584, "grad_norm": 0.05636997761846393, "learning_rate": 0.00013992212533540447, "loss": 0.8448, "step": 109350 }, { "epoch": 1.9199775276953597, "grad_norm": 0.061167589081324934, "learning_rate": 0.00013991187881773838, "loss": 0.84, "step": 109360 }, { "epoch": 1.9201530925753612, "grad_norm": 0.07063325495232811, "learning_rate": 0.0001399016318070513, "loss": 0.8561, "step": 109370 }, { "epoch": 1.9203286574553626, "grad_norm": 0.07356901460173183, "learning_rate": 0.0001398913843034731, "loss": 0.8441, "step": 109380 }, { "epoch": 1.920504222335364, "grad_norm": 0.08150419853141022, "learning_rate": 0.00013988113630713361, "loss": 0.8429, "step": 109390 }, { "epoch": 1.9206797872153656, "grad_norm": 0.043364890412105575, "learning_rate": 0.00013987088781816266, "loss": 0.8426, "step": 109400 }, { "epoch": 1.9208553520953668, "grad_norm": 0.0545539331577491, "learning_rate": 0.00013986063883669015, "loss": 0.8459, "step": 109410 }, { "epoch": 1.921030916975368, "grad_norm": 0.05165759900007163, "learning_rate": 0.00013985038936284588, "loss": 0.8481, "step": 109420 }, { "epoch": 1.9212064818553696, "grad_norm": 0.054786925532448974, "learning_rate": 0.0001398401393967597, "loss": 0.8539, "step": 109430 }, { "epoch": 1.921382046735371, "grad_norm": 0.057168807941532236, "learning_rate": 0.00013982988893856152, "loss": 0.8514, "step": 109440 }, { "epoch": 1.9215576116153725, "grad_norm": 0.05167601235074014, "learning_rate": 0.0001398196379883812, "loss": 0.8392, "step": 109450 }, { "epoch": 1.921733176495374, "grad_norm": 0.05557451034383708, "learning_rate": 0.00013980938654634866, "loss": 0.846, "step": 109460 }, { "epoch": 1.9219087413753753, "grad_norm": 0.06526894633094976, "learning_rate": 0.00013979913461259372, "loss": 0.8468, "step": 109470 }, { "epoch": 1.9220843062553765, "grad_norm": 0.06215766399190211, "learning_rate": 0.00013978888218724636, "loss": 0.8404, "step": 109480 }, { "epoch": 1.922259871135378, "grad_norm": 0.061054069116031874, "learning_rate": 0.00013977862927043637, "loss": 0.847, "step": 109490 }, { "epoch": 1.9224354360153795, "grad_norm": 0.05264543287121562, "learning_rate": 0.00013976837586229373, "loss": 0.8493, "step": 109500 }, { "epoch": 1.922611000895381, "grad_norm": 0.07293935715336002, "learning_rate": 0.00013975812196294834, "loss": 0.8521, "step": 109510 }, { "epoch": 1.9227865657753824, "grad_norm": 0.048246841580725945, "learning_rate": 0.00013974786757253014, "loss": 0.8477, "step": 109520 }, { "epoch": 1.9229621306553837, "grad_norm": 0.05283540645870476, "learning_rate": 0.000139737612691169, "loss": 0.8495, "step": 109530 }, { "epoch": 1.923137695535385, "grad_norm": 0.05730390611405973, "learning_rate": 0.00013972735731899492, "loss": 0.8456, "step": 109540 }, { "epoch": 1.9233132604153864, "grad_norm": 0.0538844349981491, "learning_rate": 0.0001397171014561378, "loss": 0.8485, "step": 109550 }, { "epoch": 1.923488825295388, "grad_norm": 0.05032730956745802, "learning_rate": 0.00013970684510272754, "loss": 0.8426, "step": 109560 }, { "epoch": 1.9236643901753894, "grad_norm": 0.06343390128446444, "learning_rate": 0.00013969658825889412, "loss": 0.8489, "step": 109570 }, { "epoch": 1.9238399550553908, "grad_norm": 0.06483015831361037, "learning_rate": 0.0001396863309247676, "loss": 0.854, "step": 109580 }, { "epoch": 1.924015519935392, "grad_norm": 0.0555310418351682, "learning_rate": 0.0001396760731004778, "loss": 0.8628, "step": 109590 }, { "epoch": 1.9241910848153936, "grad_norm": 0.046000328435455914, "learning_rate": 0.00013966581478615472, "loss": 0.8427, "step": 109600 }, { "epoch": 1.9243666496953948, "grad_norm": 0.046976298806344734, "learning_rate": 0.0001396555559819284, "loss": 0.8433, "step": 109610 }, { "epoch": 1.9245422145753963, "grad_norm": 0.05438040493530965, "learning_rate": 0.00013964529668792874, "loss": 0.8472, "step": 109620 }, { "epoch": 1.9247177794553978, "grad_norm": 0.05905506738325855, "learning_rate": 0.00013963503690428582, "loss": 0.8521, "step": 109630 }, { "epoch": 1.9248933443353993, "grad_norm": 0.05055191510228411, "learning_rate": 0.00013962477663112953, "loss": 0.8469, "step": 109640 }, { "epoch": 1.9250689092154005, "grad_norm": 0.07253488823013744, "learning_rate": 0.0001396145158685899, "loss": 0.8458, "step": 109650 }, { "epoch": 1.925244474095402, "grad_norm": 0.04859578234710109, "learning_rate": 0.000139604254616797, "loss": 0.8429, "step": 109660 }, { "epoch": 1.9254200389754033, "grad_norm": 0.08736948800487222, "learning_rate": 0.00013959399287588077, "loss": 0.8439, "step": 109670 }, { "epoch": 1.9255956038554047, "grad_norm": 0.05348680421067987, "learning_rate": 0.00013958373064597125, "loss": 0.8466, "step": 109680 }, { "epoch": 1.9257711687354062, "grad_norm": 0.06695755715051982, "learning_rate": 0.00013957346792719847, "loss": 0.8514, "step": 109690 }, { "epoch": 1.9259467336154077, "grad_norm": 0.051682733815016424, "learning_rate": 0.0001395632047196925, "loss": 0.8461, "step": 109700 }, { "epoch": 1.926122298495409, "grad_norm": 0.06197047889673589, "learning_rate": 0.00013955294102358328, "loss": 0.8487, "step": 109710 }, { "epoch": 1.9262978633754104, "grad_norm": 0.06252049921222398, "learning_rate": 0.00013954267683900093, "loss": 0.8466, "step": 109720 }, { "epoch": 1.9264734282554117, "grad_norm": 0.053533256831242326, "learning_rate": 0.00013953241216607549, "loss": 0.8482, "step": 109730 }, { "epoch": 1.9266489931354132, "grad_norm": 0.048783418057774336, "learning_rate": 0.00013952214700493697, "loss": 0.8423, "step": 109740 }, { "epoch": 1.9268245580154146, "grad_norm": 0.06079491353531496, "learning_rate": 0.0001395118813557155, "loss": 0.852, "step": 109750 }, { "epoch": 1.9270001228954161, "grad_norm": 0.07017594789607368, "learning_rate": 0.00013950161521854111, "loss": 0.8552, "step": 109760 }, { "epoch": 1.9271756877754174, "grad_norm": 0.06121378688487306, "learning_rate": 0.00013949134859354384, "loss": 0.8451, "step": 109770 }, { "epoch": 1.9273512526554188, "grad_norm": 0.07295320066324712, "learning_rate": 0.00013948108148085386, "loss": 0.8535, "step": 109780 }, { "epoch": 1.92752681753542, "grad_norm": 0.04691421691606526, "learning_rate": 0.00013947081388060117, "loss": 0.8485, "step": 109790 }, { "epoch": 1.9277023824154216, "grad_norm": 0.0490133396146313, "learning_rate": 0.0001394605457929159, "loss": 0.8381, "step": 109800 }, { "epoch": 1.927877947295423, "grad_norm": 0.05565900301449928, "learning_rate": 0.00013945027721792817, "loss": 0.8419, "step": 109810 }, { "epoch": 1.9280535121754245, "grad_norm": 0.04745193580482123, "learning_rate": 0.00013944000815576803, "loss": 0.8431, "step": 109820 }, { "epoch": 1.9282290770554258, "grad_norm": 0.0504740322233134, "learning_rate": 0.0001394297386065656, "loss": 0.8507, "step": 109830 }, { "epoch": 1.9284046419354273, "grad_norm": 0.05834547578309637, "learning_rate": 0.00013941946857045102, "loss": 0.8493, "step": 109840 }, { "epoch": 1.9285802068154285, "grad_norm": 0.05564761016223016, "learning_rate": 0.00013940919804755446, "loss": 0.8463, "step": 109850 }, { "epoch": 1.92875577169543, "grad_norm": 0.053876190800339716, "learning_rate": 0.00013939892703800595, "loss": 0.8488, "step": 109860 }, { "epoch": 1.9289313365754315, "grad_norm": 0.05492983571158568, "learning_rate": 0.00013938865554193573, "loss": 0.838, "step": 109870 }, { "epoch": 1.929106901455433, "grad_norm": 0.06891115450340746, "learning_rate": 0.00013937838355947385, "loss": 0.8442, "step": 109880 }, { "epoch": 1.9292824663354342, "grad_norm": 0.05210652973139689, "learning_rate": 0.0001393681110907505, "loss": 0.8494, "step": 109890 }, { "epoch": 1.9294580312154357, "grad_norm": 0.05603195976531089, "learning_rate": 0.0001393578381358958, "loss": 0.8291, "step": 109900 }, { "epoch": 1.929633596095437, "grad_norm": 0.041173388289455964, "learning_rate": 0.00013934756469503995, "loss": 0.8492, "step": 109910 }, { "epoch": 1.9298091609754384, "grad_norm": 0.07105663005916103, "learning_rate": 0.00013933729076831314, "loss": 0.8424, "step": 109920 }, { "epoch": 1.92998472585544, "grad_norm": 0.07890869179027006, "learning_rate": 0.0001393270163558455, "loss": 0.8464, "step": 109930 }, { "epoch": 1.9301602907354414, "grad_norm": 0.05041698930204177, "learning_rate": 0.00013931674145776718, "loss": 0.8468, "step": 109940 }, { "epoch": 1.9303358556154429, "grad_norm": 0.056166543079045235, "learning_rate": 0.00013930646607420845, "loss": 0.843, "step": 109950 }, { "epoch": 1.930511420495444, "grad_norm": 0.04607659360209689, "learning_rate": 0.00013929619020529942, "loss": 0.8436, "step": 109960 }, { "epoch": 1.9306869853754454, "grad_norm": 0.059381698668602656, "learning_rate": 0.00013928591385117036, "loss": 0.8441, "step": 109970 }, { "epoch": 1.9308625502554468, "grad_norm": 0.0778028469933006, "learning_rate": 0.00013927563701195142, "loss": 0.8404, "step": 109980 }, { "epoch": 1.9310381151354483, "grad_norm": 0.05764913897474398, "learning_rate": 0.0001392653596877728, "loss": 0.8387, "step": 109990 }, { "epoch": 1.9312136800154498, "grad_norm": 0.08215842570877105, "learning_rate": 0.00013925508187876475, "loss": 0.8504, "step": 110000 }, { "epoch": 1.9313892448954513, "grad_norm": 0.0644207058567212, "learning_rate": 0.00013924480358505747, "loss": 0.8584, "step": 110010 }, { "epoch": 1.9315648097754525, "grad_norm": 0.055364481097454454, "learning_rate": 0.00013923452480678122, "loss": 0.8479, "step": 110020 }, { "epoch": 1.9317403746554538, "grad_norm": 0.05851137704356893, "learning_rate": 0.0001392242455440662, "loss": 0.8507, "step": 110030 }, { "epoch": 1.9319159395354553, "grad_norm": 0.059985345578728415, "learning_rate": 0.00013921396579704267, "loss": 0.8499, "step": 110040 }, { "epoch": 1.9320915044154567, "grad_norm": 0.07607149495968114, "learning_rate": 0.00013920368556584087, "loss": 0.8561, "step": 110050 }, { "epoch": 1.9322670692954582, "grad_norm": 0.05051007656277145, "learning_rate": 0.00013919340485059106, "loss": 0.8475, "step": 110060 }, { "epoch": 1.9324426341754597, "grad_norm": 0.056546594192532225, "learning_rate": 0.00013918312365142346, "loss": 0.8448, "step": 110070 }, { "epoch": 1.932618199055461, "grad_norm": 0.050334877426547006, "learning_rate": 0.00013917284196846838, "loss": 0.8554, "step": 110080 }, { "epoch": 1.9327937639354622, "grad_norm": 0.0634736074918317, "learning_rate": 0.0001391625598018561, "loss": 0.838, "step": 110090 }, { "epoch": 1.9329693288154637, "grad_norm": 0.05483337694115247, "learning_rate": 0.00013915227715171686, "loss": 0.842, "step": 110100 }, { "epoch": 1.9331448936954652, "grad_norm": 0.05097409870360858, "learning_rate": 0.00013914199401818093, "loss": 0.8434, "step": 110110 }, { "epoch": 1.9333204585754666, "grad_norm": 0.05815932542587577, "learning_rate": 0.0001391317104013787, "loss": 0.8434, "step": 110120 }, { "epoch": 1.9334960234554681, "grad_norm": 0.052847771976825636, "learning_rate": 0.0001391214263014403, "loss": 0.8421, "step": 110130 }, { "epoch": 1.9336715883354694, "grad_norm": 0.08553797004293283, "learning_rate": 0.00013911114171849618, "loss": 0.8447, "step": 110140 }, { "epoch": 1.9338471532154706, "grad_norm": 0.04233605013806068, "learning_rate": 0.00013910085665267658, "loss": 0.8487, "step": 110150 }, { "epoch": 1.934022718095472, "grad_norm": 0.07155425528622072, "learning_rate": 0.0001390905711041118, "loss": 0.8485, "step": 110160 }, { "epoch": 1.9341982829754736, "grad_norm": 0.0732868573175537, "learning_rate": 0.0001390802850729322, "loss": 0.843, "step": 110170 }, { "epoch": 1.934373847855475, "grad_norm": 0.050632813856611315, "learning_rate": 0.0001390699985592681, "loss": 0.8479, "step": 110180 }, { "epoch": 1.9345494127354765, "grad_norm": 0.05967548786588748, "learning_rate": 0.0001390597115632498, "loss": 0.8506, "step": 110190 }, { "epoch": 1.9347249776154778, "grad_norm": 0.05182076271245335, "learning_rate": 0.00013904942408500767, "loss": 0.852, "step": 110200 }, { "epoch": 1.934900542495479, "grad_norm": 0.056142480256224606, "learning_rate": 0.00013903913612467206, "loss": 0.8376, "step": 110210 }, { "epoch": 1.9350761073754805, "grad_norm": 0.04969720547337604, "learning_rate": 0.0001390288476823733, "loss": 0.8516, "step": 110220 }, { "epoch": 1.935251672255482, "grad_norm": 0.06205925030680653, "learning_rate": 0.00013901855875824175, "loss": 0.8398, "step": 110230 }, { "epoch": 1.9354272371354835, "grad_norm": 0.054358827800129836, "learning_rate": 0.0001390082693524078, "loss": 0.848, "step": 110240 }, { "epoch": 1.935602802015485, "grad_norm": 0.06906513094921642, "learning_rate": 0.00013899797946500172, "loss": 0.8418, "step": 110250 }, { "epoch": 1.9357783668954862, "grad_norm": 0.08708874220530889, "learning_rate": 0.000138987689096154, "loss": 0.8382, "step": 110260 }, { "epoch": 1.9359539317754875, "grad_norm": 0.07483690366785595, "learning_rate": 0.00013897739824599497, "loss": 0.838, "step": 110270 }, { "epoch": 1.936129496655489, "grad_norm": 0.055521690406955794, "learning_rate": 0.00013896710691465507, "loss": 0.8472, "step": 110280 }, { "epoch": 1.9363050615354904, "grad_norm": 0.09102722329746905, "learning_rate": 0.0001389568151022646, "loss": 0.8434, "step": 110290 }, { "epoch": 1.936480626415492, "grad_norm": 0.04957268099073826, "learning_rate": 0.00013894652280895401, "loss": 0.8473, "step": 110300 }, { "epoch": 1.9366561912954934, "grad_norm": 0.059473850385543196, "learning_rate": 0.00013893623003485373, "loss": 0.8425, "step": 110310 }, { "epoch": 1.9368317561754946, "grad_norm": 0.06053463426270177, "learning_rate": 0.00013892593678009415, "loss": 0.8482, "step": 110320 }, { "epoch": 1.9370073210554961, "grad_norm": 0.04568089749588158, "learning_rate": 0.00013891564304480567, "loss": 0.8465, "step": 110330 }, { "epoch": 1.9371828859354974, "grad_norm": 0.06143853888972699, "learning_rate": 0.0001389053488291187, "loss": 0.839, "step": 110340 }, { "epoch": 1.9373584508154988, "grad_norm": 0.06778017752627759, "learning_rate": 0.0001388950541331637, "loss": 0.8546, "step": 110350 }, { "epoch": 1.9375340156955003, "grad_norm": 0.06541387657628221, "learning_rate": 0.0001388847589570711, "loss": 0.8541, "step": 110360 }, { "epoch": 1.9377095805755018, "grad_norm": 0.0492002750964673, "learning_rate": 0.00013887446330097136, "loss": 0.8482, "step": 110370 }, { "epoch": 1.937885145455503, "grad_norm": 0.06129444877002491, "learning_rate": 0.00013886416716499492, "loss": 0.8435, "step": 110380 }, { "epoch": 1.9380607103355045, "grad_norm": 0.05518731498555092, "learning_rate": 0.00013885387054927224, "loss": 0.8479, "step": 110390 }, { "epoch": 1.9382362752155058, "grad_norm": 0.07131631111995335, "learning_rate": 0.00013884357345393374, "loss": 0.8432, "step": 110400 }, { "epoch": 1.9384118400955073, "grad_norm": 0.0801187176384715, "learning_rate": 0.0001388332758791099, "loss": 0.843, "step": 110410 }, { "epoch": 1.9385874049755087, "grad_norm": 0.06456239092853251, "learning_rate": 0.00013882297782493122, "loss": 0.8451, "step": 110420 }, { "epoch": 1.9387629698555102, "grad_norm": 0.047566465989190884, "learning_rate": 0.00013881267929152818, "loss": 0.8407, "step": 110430 }, { "epoch": 1.9389385347355115, "grad_norm": 0.06723719921566795, "learning_rate": 0.00013880238027903124, "loss": 0.8448, "step": 110440 }, { "epoch": 1.939114099615513, "grad_norm": 0.048114590133407346, "learning_rate": 0.00013879208078757086, "loss": 0.8517, "step": 110450 }, { "epoch": 1.9392896644955142, "grad_norm": 0.06503803358546217, "learning_rate": 0.0001387817808172776, "loss": 0.8456, "step": 110460 }, { "epoch": 1.9394652293755157, "grad_norm": 0.05882121869710032, "learning_rate": 0.00013877148036828193, "loss": 0.8461, "step": 110470 }, { "epoch": 1.9396407942555172, "grad_norm": 0.06594265824971789, "learning_rate": 0.00013876117944071437, "loss": 0.8474, "step": 110480 }, { "epoch": 1.9398163591355186, "grad_norm": 0.04677869249307817, "learning_rate": 0.00013875087803470545, "loss": 0.8424, "step": 110490 }, { "epoch": 1.93999192401552, "grad_norm": 0.051964072768149336, "learning_rate": 0.00013874057615038564, "loss": 0.8484, "step": 110500 }, { "epoch": 1.9401674888955214, "grad_norm": 0.046364603562904644, "learning_rate": 0.0001387302737878855, "loss": 0.845, "step": 110510 }, { "epoch": 1.9403430537755226, "grad_norm": 0.05873337809483829, "learning_rate": 0.00013871997094733556, "loss": 0.8433, "step": 110520 }, { "epoch": 1.940518618655524, "grad_norm": 0.0489641474617125, "learning_rate": 0.00013870966762886638, "loss": 0.8542, "step": 110530 }, { "epoch": 1.9406941835355256, "grad_norm": 0.07046771358684527, "learning_rate": 0.00013869936383260843, "loss": 0.8434, "step": 110540 }, { "epoch": 1.940869748415527, "grad_norm": 0.04696090753137456, "learning_rate": 0.0001386890595586924, "loss": 0.8446, "step": 110550 }, { "epoch": 1.9410453132955283, "grad_norm": 0.05853005643756745, "learning_rate": 0.00013867875480724873, "loss": 0.855, "step": 110560 }, { "epoch": 1.9412208781755298, "grad_norm": 0.043471358290616144, "learning_rate": 0.00013866844957840797, "loss": 0.8496, "step": 110570 }, { "epoch": 1.941396443055531, "grad_norm": 0.04515198528544044, "learning_rate": 0.0001386581438723008, "loss": 0.8478, "step": 110580 }, { "epoch": 1.9415720079355325, "grad_norm": 0.05324858016657406, "learning_rate": 0.00013864783768905768, "loss": 0.8491, "step": 110590 }, { "epoch": 1.941747572815534, "grad_norm": 0.08524420360595526, "learning_rate": 0.00013863753102880924, "loss": 0.8472, "step": 110600 }, { "epoch": 1.9419231376955355, "grad_norm": 0.05452026837492923, "learning_rate": 0.0001386272238916861, "loss": 0.8501, "step": 110610 }, { "epoch": 1.9420987025755367, "grad_norm": 0.09513483607386232, "learning_rate": 0.00013861691627781883, "loss": 0.8473, "step": 110620 }, { "epoch": 1.9422742674555382, "grad_norm": 0.054360144956843715, "learning_rate": 0.000138606608187338, "loss": 0.8425, "step": 110630 }, { "epoch": 1.9424498323355395, "grad_norm": 0.05073077666801863, "learning_rate": 0.00013859629962037422, "loss": 0.8501, "step": 110640 }, { "epoch": 1.942625397215541, "grad_norm": 0.06391493835532694, "learning_rate": 0.00013858599057705814, "loss": 0.8473, "step": 110650 }, { "epoch": 1.9428009620955424, "grad_norm": 0.049415378522348924, "learning_rate": 0.00013857568105752035, "loss": 0.8552, "step": 110660 }, { "epoch": 1.942976526975544, "grad_norm": 0.05512735365108504, "learning_rate": 0.0001385653710618915, "loss": 0.847, "step": 110670 }, { "epoch": 1.9431520918555454, "grad_norm": 0.05396216139192124, "learning_rate": 0.00013855506059030218, "loss": 0.846, "step": 110680 }, { "epoch": 1.9433276567355466, "grad_norm": 0.11540291208205888, "learning_rate": 0.00013854474964288303, "loss": 0.8464, "step": 110690 }, { "epoch": 1.943503221615548, "grad_norm": 0.05282965074713177, "learning_rate": 0.00013853443821976473, "loss": 0.8412, "step": 110700 }, { "epoch": 1.9436787864955494, "grad_norm": 0.04938275950187081, "learning_rate": 0.00013852412632107787, "loss": 0.8401, "step": 110710 }, { "epoch": 1.9438543513755508, "grad_norm": 0.1053093634857138, "learning_rate": 0.0001385138139469532, "loss": 0.8426, "step": 110720 }, { "epoch": 1.9440299162555523, "grad_norm": 0.06211397431521468, "learning_rate": 0.00013850350109752128, "loss": 0.8465, "step": 110730 }, { "epoch": 1.9442054811355538, "grad_norm": 0.06471545038863896, "learning_rate": 0.0001384931877729128, "loss": 0.85, "step": 110740 }, { "epoch": 1.944381046015555, "grad_norm": 0.057918616330289635, "learning_rate": 0.00013848287397325848, "loss": 0.846, "step": 110750 }, { "epoch": 1.9445566108955563, "grad_norm": 0.05946825186977022, "learning_rate": 0.00013847255969868892, "loss": 0.842, "step": 110760 }, { "epoch": 1.9447321757755578, "grad_norm": 0.046267994122825555, "learning_rate": 0.00013846224494933488, "loss": 0.8498, "step": 110770 }, { "epoch": 1.9449077406555593, "grad_norm": 0.04900158334707771, "learning_rate": 0.00013845192972532702, "loss": 0.8518, "step": 110780 }, { "epoch": 1.9450833055355607, "grad_norm": 0.054208597130305104, "learning_rate": 0.00013844161402679604, "loss": 0.84, "step": 110790 }, { "epoch": 1.9452588704155622, "grad_norm": 0.05314854600360138, "learning_rate": 0.00013843129785387262, "loss": 0.8471, "step": 110800 }, { "epoch": 1.9454344352955635, "grad_norm": 0.06662735167191434, "learning_rate": 0.00013842098120668745, "loss": 0.8432, "step": 110810 }, { "epoch": 1.9456100001755647, "grad_norm": 0.06542019234872798, "learning_rate": 0.0001384106640853713, "loss": 0.836, "step": 110820 }, { "epoch": 1.9457855650555662, "grad_norm": 0.07337907760309358, "learning_rate": 0.0001384003464900549, "loss": 0.8466, "step": 110830 }, { "epoch": 1.9459611299355677, "grad_norm": 0.05767330555197751, "learning_rate": 0.0001383900284208689, "loss": 0.8411, "step": 110840 }, { "epoch": 1.9461366948155692, "grad_norm": 0.09528948033454833, "learning_rate": 0.0001383797098779441, "loss": 0.8459, "step": 110850 }, { "epoch": 1.9463122596955706, "grad_norm": 0.05531033053246291, "learning_rate": 0.0001383693908614112, "loss": 0.8398, "step": 110860 }, { "epoch": 1.946487824575572, "grad_norm": 0.04316658181336026, "learning_rate": 0.00013835907137140093, "loss": 0.8459, "step": 110870 }, { "epoch": 1.9466633894555732, "grad_norm": 0.06599577482938304, "learning_rate": 0.0001383487514080441, "loss": 0.8426, "step": 110880 }, { "epoch": 1.9468389543355746, "grad_norm": 0.043773838352917614, "learning_rate": 0.00013833843097147146, "loss": 0.8492, "step": 110890 }, { "epoch": 1.9470145192155761, "grad_norm": 0.0613839366703142, "learning_rate": 0.00013832811006181367, "loss": 0.8498, "step": 110900 }, { "epoch": 1.9471900840955776, "grad_norm": 0.08432225639555833, "learning_rate": 0.0001383177886792016, "loss": 0.8521, "step": 110910 }, { "epoch": 1.947365648975579, "grad_norm": 0.047474200986894866, "learning_rate": 0.00013830746682376603, "loss": 0.8549, "step": 110920 }, { "epoch": 1.9475412138555803, "grad_norm": 0.054698344460265266, "learning_rate": 0.00013829714449563766, "loss": 0.8457, "step": 110930 }, { "epoch": 1.9477167787355816, "grad_norm": 0.06654912234448594, "learning_rate": 0.00013828682169494735, "loss": 0.8449, "step": 110940 }, { "epoch": 1.947892343615583, "grad_norm": 0.049443629112518046, "learning_rate": 0.00013827649842182587, "loss": 0.847, "step": 110950 }, { "epoch": 1.9480679084955845, "grad_norm": 0.05911688569446259, "learning_rate": 0.000138266174676404, "loss": 0.8435, "step": 110960 }, { "epoch": 1.948243473375586, "grad_norm": 0.06707515792637, "learning_rate": 0.00013825585045881253, "loss": 0.8503, "step": 110970 }, { "epoch": 1.9484190382555875, "grad_norm": 0.06218555251206529, "learning_rate": 0.00013824552576918232, "loss": 0.8511, "step": 110980 }, { "epoch": 1.9485946031355887, "grad_norm": 0.07272450904394485, "learning_rate": 0.00013823520060764414, "loss": 0.8472, "step": 110990 }, { "epoch": 1.94877016801559, "grad_norm": 0.04967469027415058, "learning_rate": 0.00013822487497432885, "loss": 0.8487, "step": 111000 }, { "epoch": 1.9489457328955915, "grad_norm": 0.06821883623636296, "learning_rate": 0.00013821454886936728, "loss": 0.8553, "step": 111010 }, { "epoch": 1.949121297775593, "grad_norm": 0.05175521027511555, "learning_rate": 0.0001382042222928902, "loss": 0.8403, "step": 111020 }, { "epoch": 1.9492968626555944, "grad_norm": 0.05170647933957972, "learning_rate": 0.00013819389524502852, "loss": 0.8447, "step": 111030 }, { "epoch": 1.949472427535596, "grad_norm": 0.054873140532061465, "learning_rate": 0.00013818356772591305, "loss": 0.8383, "step": 111040 }, { "epoch": 1.9496479924155972, "grad_norm": 0.06431816070658887, "learning_rate": 0.00013817323973567465, "loss": 0.8493, "step": 111050 }, { "epoch": 1.9498235572955986, "grad_norm": 0.04444172519907014, "learning_rate": 0.00013816291127444422, "loss": 0.84, "step": 111060 }, { "epoch": 1.9499991221756, "grad_norm": 0.051309588789921194, "learning_rate": 0.00013815258234235254, "loss": 0.8505, "step": 111070 }, { "epoch": 1.9501746870556014, "grad_norm": 0.06573534223375747, "learning_rate": 0.00013814225293953052, "loss": 0.842, "step": 111080 }, { "epoch": 1.9503502519356029, "grad_norm": 0.0675857645123154, "learning_rate": 0.00013813192306610903, "loss": 0.8481, "step": 111090 }, { "epoch": 1.9505258168156043, "grad_norm": 0.05682141282059442, "learning_rate": 0.00013812159272221896, "loss": 0.8489, "step": 111100 }, { "epoch": 1.9507013816956056, "grad_norm": 0.06217729963056017, "learning_rate": 0.0001381112619079912, "loss": 0.8444, "step": 111110 }, { "epoch": 1.950876946575607, "grad_norm": 0.04751208094930493, "learning_rate": 0.00013810093062355667, "loss": 0.837, "step": 111120 }, { "epoch": 1.9510525114556083, "grad_norm": 0.05091998410470348, "learning_rate": 0.00013809059886904623, "loss": 0.8456, "step": 111130 }, { "epoch": 1.9512280763356098, "grad_norm": 0.05798100042864082, "learning_rate": 0.00013808026664459075, "loss": 0.8569, "step": 111140 }, { "epoch": 1.9514036412156113, "grad_norm": 0.04211340649263376, "learning_rate": 0.0001380699339503212, "loss": 0.8479, "step": 111150 }, { "epoch": 1.9515792060956128, "grad_norm": 0.0543682623445383, "learning_rate": 0.00013805960078636852, "loss": 0.843, "step": 111160 }, { "epoch": 1.951754770975614, "grad_norm": 0.10325494897322293, "learning_rate": 0.00013804926715286358, "loss": 0.8469, "step": 111170 }, { "epoch": 1.9519303358556155, "grad_norm": 0.06659514440094427, "learning_rate": 0.00013803893304993733, "loss": 0.8458, "step": 111180 }, { "epoch": 1.9521059007356167, "grad_norm": 0.04266441328691606, "learning_rate": 0.00013802859847772067, "loss": 0.8388, "step": 111190 }, { "epoch": 1.9522814656156182, "grad_norm": 0.04988117664058472, "learning_rate": 0.00013801826343634458, "loss": 0.8477, "step": 111200 }, { "epoch": 1.9524570304956197, "grad_norm": 0.06384853441538592, "learning_rate": 0.00013800792792594005, "loss": 0.84, "step": 111210 }, { "epoch": 1.9526325953756212, "grad_norm": 0.047131634168125006, "learning_rate": 0.0001379975919466379, "loss": 0.8482, "step": 111220 }, { "epoch": 1.9528081602556224, "grad_norm": 0.03993722350107053, "learning_rate": 0.00013798725549856925, "loss": 0.8446, "step": 111230 }, { "epoch": 1.952983725135624, "grad_norm": 0.056306085263318775, "learning_rate": 0.0001379769185818649, "loss": 0.8449, "step": 111240 }, { "epoch": 1.9531592900156252, "grad_norm": 0.06439511924802489, "learning_rate": 0.00013796658119665593, "loss": 0.8479, "step": 111250 }, { "epoch": 1.9533348548956266, "grad_norm": 0.07884682817670598, "learning_rate": 0.00013795624334307334, "loss": 0.8504, "step": 111260 }, { "epoch": 1.9535104197756281, "grad_norm": 0.05548761980499351, "learning_rate": 0.000137945905021248, "loss": 0.8423, "step": 111270 }, { "epoch": 1.9536859846556296, "grad_norm": 0.06971279514820031, "learning_rate": 0.000137935566231311, "loss": 0.8469, "step": 111280 }, { "epoch": 1.9538615495356308, "grad_norm": 0.05832756348475977, "learning_rate": 0.00013792522697339332, "loss": 0.8483, "step": 111290 }, { "epoch": 1.9540371144156323, "grad_norm": 0.054820127882666984, "learning_rate": 0.00013791488724762592, "loss": 0.8379, "step": 111300 }, { "epoch": 1.9542126792956336, "grad_norm": 0.05456210953624253, "learning_rate": 0.00013790454705413978, "loss": 0.8491, "step": 111310 }, { "epoch": 1.954388244175635, "grad_norm": 0.059374741276292546, "learning_rate": 0.00013789420639306597, "loss": 0.8498, "step": 111320 }, { "epoch": 1.9545638090556365, "grad_norm": 0.05086896019944371, "learning_rate": 0.00013788386526453553, "loss": 0.8389, "step": 111330 }, { "epoch": 1.954739373935638, "grad_norm": 0.05081394123106769, "learning_rate": 0.00013787352366867944, "loss": 0.8522, "step": 111340 }, { "epoch": 1.9549149388156393, "grad_norm": 0.05744919775767495, "learning_rate": 0.00013786318160562873, "loss": 0.8493, "step": 111350 }, { "epoch": 1.9550905036956407, "grad_norm": 0.06616854035134763, "learning_rate": 0.00013785283907551444, "loss": 0.8456, "step": 111360 }, { "epoch": 1.955266068575642, "grad_norm": 0.047916096529324805, "learning_rate": 0.0001378424960784676, "loss": 0.8524, "step": 111370 }, { "epoch": 1.9554416334556435, "grad_norm": 0.06033553490430679, "learning_rate": 0.00013783215261461932, "loss": 0.8483, "step": 111380 }, { "epoch": 1.955617198335645, "grad_norm": 0.05790408478531029, "learning_rate": 0.00013782180868410058, "loss": 0.8436, "step": 111390 }, { "epoch": 1.9557927632156464, "grad_norm": 0.0558236560889582, "learning_rate": 0.00013781146428704249, "loss": 0.844, "step": 111400 }, { "epoch": 1.955968328095648, "grad_norm": 0.0574266909693405, "learning_rate": 0.00013780111942357604, "loss": 0.846, "step": 111410 }, { "epoch": 1.9561438929756492, "grad_norm": 0.06630645954575247, "learning_rate": 0.00013779077409383238, "loss": 0.8377, "step": 111420 }, { "epoch": 1.9563194578556504, "grad_norm": 0.05294057787728783, "learning_rate": 0.00013778042829794258, "loss": 0.855, "step": 111430 }, { "epoch": 1.956495022735652, "grad_norm": 0.05209037192016049, "learning_rate": 0.00013777008203603766, "loss": 0.8487, "step": 111440 }, { "epoch": 1.9566705876156534, "grad_norm": 0.04902073569170586, "learning_rate": 0.00013775973530824883, "loss": 0.8392, "step": 111450 }, { "epoch": 1.9568461524956549, "grad_norm": 0.04148426298274345, "learning_rate": 0.00013774938811470707, "loss": 0.8483, "step": 111460 }, { "epoch": 1.9570217173756563, "grad_norm": 0.06196535876573423, "learning_rate": 0.00013773904045554351, "loss": 0.843, "step": 111470 }, { "epoch": 1.9571972822556576, "grad_norm": 0.04889457941551729, "learning_rate": 0.00013772869233088923, "loss": 0.8425, "step": 111480 }, { "epoch": 1.9573728471356588, "grad_norm": 0.07889997696083662, "learning_rate": 0.00013771834374087542, "loss": 0.8411, "step": 111490 }, { "epoch": 1.9575484120156603, "grad_norm": 0.04564574881801453, "learning_rate": 0.0001377079946856332, "loss": 0.851, "step": 111500 }, { "epoch": 1.9577239768956618, "grad_norm": 0.049490382507343694, "learning_rate": 0.00013769764516529358, "loss": 0.849, "step": 111510 }, { "epoch": 1.9578995417756633, "grad_norm": 0.054334320191869845, "learning_rate": 0.0001376872951799878, "loss": 0.8512, "step": 111520 }, { "epoch": 1.9580751066556648, "grad_norm": 0.05710555754741688, "learning_rate": 0.00013767694472984694, "loss": 0.855, "step": 111530 }, { "epoch": 1.958250671535666, "grad_norm": 0.06884234692258692, "learning_rate": 0.00013766659381500215, "loss": 0.845, "step": 111540 }, { "epoch": 1.9584262364156673, "grad_norm": 0.05764003128176208, "learning_rate": 0.00013765624243558463, "loss": 0.8439, "step": 111550 }, { "epoch": 1.9586018012956687, "grad_norm": 0.06915201123688364, "learning_rate": 0.00013764589059172546, "loss": 0.8507, "step": 111560 }, { "epoch": 1.9587773661756702, "grad_norm": 0.055482567730472075, "learning_rate": 0.00013763553828355588, "loss": 0.8486, "step": 111570 }, { "epoch": 1.9589529310556717, "grad_norm": 0.05130169807393473, "learning_rate": 0.00013762518551120695, "loss": 0.8554, "step": 111580 }, { "epoch": 1.9591284959356732, "grad_norm": 0.05859079667471612, "learning_rate": 0.00013761483227480993, "loss": 0.8429, "step": 111590 }, { "epoch": 1.9593040608156744, "grad_norm": 0.04607285720226849, "learning_rate": 0.00013760447857449597, "loss": 0.8585, "step": 111600 }, { "epoch": 1.9594796256956757, "grad_norm": 0.08667571554579434, "learning_rate": 0.0001375941244103962, "loss": 0.8401, "step": 111610 }, { "epoch": 1.9596551905756772, "grad_norm": 0.0837991878738869, "learning_rate": 0.00013758376978264194, "loss": 0.8596, "step": 111620 }, { "epoch": 1.9598307554556786, "grad_norm": 0.05735243482677296, "learning_rate": 0.0001375734146913643, "loss": 0.8373, "step": 111630 }, { "epoch": 1.9600063203356801, "grad_norm": 0.04761647211153599, "learning_rate": 0.00013756305913669445, "loss": 0.852, "step": 111640 }, { "epoch": 1.9601818852156816, "grad_norm": 0.06219658495593204, "learning_rate": 0.00013755270311876364, "loss": 0.8334, "step": 111650 }, { "epoch": 1.9603574500956829, "grad_norm": 0.07376402181450545, "learning_rate": 0.00013754234663770307, "loss": 0.841, "step": 111660 }, { "epoch": 1.960533014975684, "grad_norm": 0.05190014774494503, "learning_rate": 0.00013753198969364397, "loss": 0.8539, "step": 111670 }, { "epoch": 1.9607085798556856, "grad_norm": 0.05315925522893024, "learning_rate": 0.00013752163228671754, "loss": 0.851, "step": 111680 }, { "epoch": 1.960884144735687, "grad_norm": 0.06666761720435306, "learning_rate": 0.0001375112744170551, "loss": 0.8547, "step": 111690 }, { "epoch": 1.9610597096156885, "grad_norm": 0.06553831956176707, "learning_rate": 0.00013750091608478773, "loss": 0.8474, "step": 111700 }, { "epoch": 1.96123527449569, "grad_norm": 0.05389466632573982, "learning_rate": 0.00013749055729004678, "loss": 0.8537, "step": 111710 }, { "epoch": 1.9614108393756913, "grad_norm": 0.04755621928665611, "learning_rate": 0.00013748019803296353, "loss": 0.8415, "step": 111720 }, { "epoch": 1.9615864042556925, "grad_norm": 0.06609845973689565, "learning_rate": 0.00013746983831366911, "loss": 0.8559, "step": 111730 }, { "epoch": 1.961761969135694, "grad_norm": 0.0639902368494352, "learning_rate": 0.00013745947813229492, "loss": 0.8481, "step": 111740 }, { "epoch": 1.9619375340156955, "grad_norm": 0.06090942230032175, "learning_rate": 0.0001374491174889721, "loss": 0.8515, "step": 111750 }, { "epoch": 1.962113098895697, "grad_norm": 0.05010329637736998, "learning_rate": 0.00013743875638383196, "loss": 0.8441, "step": 111760 }, { "epoch": 1.9622886637756984, "grad_norm": 0.06701389864253225, "learning_rate": 0.00013742839481700582, "loss": 0.8468, "step": 111770 }, { "epoch": 1.9624642286556997, "grad_norm": 0.05781902753120717, "learning_rate": 0.00013741803278862494, "loss": 0.843, "step": 111780 }, { "epoch": 1.9626397935357012, "grad_norm": 0.05085217351125225, "learning_rate": 0.0001374076702988206, "loss": 0.8446, "step": 111790 }, { "epoch": 1.9628153584157024, "grad_norm": 0.05772546283327184, "learning_rate": 0.00013739730734772413, "loss": 0.8403, "step": 111800 }, { "epoch": 1.962990923295704, "grad_norm": 0.06945430522377904, "learning_rate": 0.00013738694393546677, "loss": 0.8462, "step": 111810 }, { "epoch": 1.9631664881757054, "grad_norm": 0.05208059240649248, "learning_rate": 0.00013737658006217986, "loss": 0.8437, "step": 111820 }, { "epoch": 1.9633420530557069, "grad_norm": 0.09142016896988486, "learning_rate": 0.0001373662157279947, "loss": 0.8449, "step": 111830 }, { "epoch": 1.9635176179357081, "grad_norm": 0.061436105075631776, "learning_rate": 0.00013735585093304265, "loss": 0.8479, "step": 111840 }, { "epoch": 1.9636931828157096, "grad_norm": 0.05581460051234504, "learning_rate": 0.00013734548567745497, "loss": 0.838, "step": 111850 }, { "epoch": 1.9638687476957108, "grad_norm": 0.05685588091062141, "learning_rate": 0.00013733511996136304, "loss": 0.843, "step": 111860 }, { "epoch": 1.9640443125757123, "grad_norm": 0.07599634910143273, "learning_rate": 0.00013732475378489816, "loss": 0.8427, "step": 111870 }, { "epoch": 1.9642198774557138, "grad_norm": 0.04979434157805368, "learning_rate": 0.00013731438714819166, "loss": 0.8379, "step": 111880 }, { "epoch": 1.9643954423357153, "grad_norm": 0.05938079840790157, "learning_rate": 0.00013730402005137496, "loss": 0.8462, "step": 111890 }, { "epoch": 1.9645710072157165, "grad_norm": 0.07252541563748643, "learning_rate": 0.00013729365249457934, "loss": 0.8473, "step": 111900 }, { "epoch": 1.964746572095718, "grad_norm": 0.05589546804008972, "learning_rate": 0.00013728328447793622, "loss": 0.8553, "step": 111910 }, { "epoch": 1.9649221369757193, "grad_norm": 0.05801023838666999, "learning_rate": 0.00013727291600157694, "loss": 0.8467, "step": 111920 }, { "epoch": 1.9650977018557207, "grad_norm": 0.04524414239400619, "learning_rate": 0.00013726254706563279, "loss": 0.8533, "step": 111930 }, { "epoch": 1.9652732667357222, "grad_norm": 0.048569899294953256, "learning_rate": 0.0001372521776702353, "loss": 0.8536, "step": 111940 }, { "epoch": 1.9654488316157237, "grad_norm": 0.05675421069427916, "learning_rate": 0.00013724180781551576, "loss": 0.8451, "step": 111950 }, { "epoch": 1.965624396495725, "grad_norm": 0.04428275246229521, "learning_rate": 0.00013723143750160553, "loss": 0.8471, "step": 111960 }, { "epoch": 1.9657999613757264, "grad_norm": 0.06515163323687068, "learning_rate": 0.00013722106672863605, "loss": 0.8459, "step": 111970 }, { "epoch": 1.9659755262557277, "grad_norm": 0.04549247944855436, "learning_rate": 0.00013721069549673876, "loss": 0.8456, "step": 111980 }, { "epoch": 1.9661510911357292, "grad_norm": 0.050607779214252246, "learning_rate": 0.00013720032380604497, "loss": 0.8569, "step": 111990 }, { "epoch": 1.9663266560157306, "grad_norm": 0.06105337184140935, "learning_rate": 0.00013718995165668614, "loss": 0.8418, "step": 112000 }, { "epoch": 1.9665022208957321, "grad_norm": 0.04909532717851437, "learning_rate": 0.0001371795790487937, "loss": 0.8532, "step": 112010 }, { "epoch": 1.9666777857757334, "grad_norm": 0.07152726501538492, "learning_rate": 0.00013716920598249907, "loss": 0.8465, "step": 112020 }, { "epoch": 1.9668533506557349, "grad_norm": 0.05531608623245482, "learning_rate": 0.00013715883245793366, "loss": 0.8446, "step": 112030 }, { "epoch": 1.9670289155357361, "grad_norm": 0.08141642137353286, "learning_rate": 0.00013714845847522891, "loss": 0.8485, "step": 112040 }, { "epoch": 1.9672044804157376, "grad_norm": 0.05569285191434445, "learning_rate": 0.00013713808403451624, "loss": 0.8462, "step": 112050 }, { "epoch": 1.967380045295739, "grad_norm": 0.10253804804456446, "learning_rate": 0.00013712770913592716, "loss": 0.8525, "step": 112060 }, { "epoch": 1.9675556101757405, "grad_norm": 0.05665519156320135, "learning_rate": 0.00013711733377959304, "loss": 0.8515, "step": 112070 }, { "epoch": 1.9677311750557418, "grad_norm": 0.05156344241792496, "learning_rate": 0.00013710695796564546, "loss": 0.8495, "step": 112080 }, { "epoch": 1.9679067399357433, "grad_norm": 0.057602998794603656, "learning_rate": 0.0001370965816942157, "loss": 0.8498, "step": 112090 }, { "epoch": 1.9680823048157445, "grad_norm": 0.044158164936276216, "learning_rate": 0.00013708620496543538, "loss": 0.8474, "step": 112100 }, { "epoch": 1.968257869695746, "grad_norm": 0.05022559477354006, "learning_rate": 0.00013707582777943594, "loss": 0.8466, "step": 112110 }, { "epoch": 1.9684334345757475, "grad_norm": 0.056847037859170654, "learning_rate": 0.00013706545013634878, "loss": 0.8506, "step": 112120 }, { "epoch": 1.968608999455749, "grad_norm": 0.05358143267516846, "learning_rate": 0.00013705507203630554, "loss": 0.8439, "step": 112130 }, { "epoch": 1.9687845643357504, "grad_norm": 0.06274214332666163, "learning_rate": 0.00013704469347943762, "loss": 0.8435, "step": 112140 }, { "epoch": 1.9689601292157517, "grad_norm": 0.049815264143497066, "learning_rate": 0.0001370343144658765, "loss": 0.8455, "step": 112150 }, { "epoch": 1.969135694095753, "grad_norm": 0.06035834975044513, "learning_rate": 0.0001370239349957537, "loss": 0.8353, "step": 112160 }, { "epoch": 1.9693112589757544, "grad_norm": 0.05868245528217591, "learning_rate": 0.00013701355506920074, "loss": 0.8413, "step": 112170 }, { "epoch": 1.969486823855756, "grad_norm": 0.06134697673325138, "learning_rate": 0.00013700317468634917, "loss": 0.8383, "step": 112180 }, { "epoch": 1.9696623887357574, "grad_norm": 0.055422040190051855, "learning_rate": 0.00013699279384733046, "loss": 0.8452, "step": 112190 }, { "epoch": 1.9698379536157589, "grad_norm": 0.053009749135581284, "learning_rate": 0.00013698241255227615, "loss": 0.8411, "step": 112200 }, { "epoch": 1.9700135184957601, "grad_norm": 0.05532207092116294, "learning_rate": 0.00013697203080131776, "loss": 0.8457, "step": 112210 }, { "epoch": 1.9701890833757614, "grad_norm": 0.06225810359290014, "learning_rate": 0.00013696164859458684, "loss": 0.8482, "step": 112220 }, { "epoch": 1.9703646482557629, "grad_norm": 0.06508954611006998, "learning_rate": 0.00013695126593221498, "loss": 0.8427, "step": 112230 }, { "epoch": 1.9705402131357643, "grad_norm": 0.05064508561542393, "learning_rate": 0.00013694088281433365, "loss": 0.8567, "step": 112240 }, { "epoch": 1.9707157780157658, "grad_norm": 0.04745319522216446, "learning_rate": 0.00013693049924107448, "loss": 0.8465, "step": 112250 }, { "epoch": 1.9708913428957673, "grad_norm": 0.06296571117194988, "learning_rate": 0.000136920115212569, "loss": 0.8504, "step": 112260 }, { "epoch": 1.9710669077757685, "grad_norm": 0.08091111467866258, "learning_rate": 0.00013690973072894873, "loss": 0.8529, "step": 112270 }, { "epoch": 1.9712424726557698, "grad_norm": 0.06184837537969197, "learning_rate": 0.00013689934579034536, "loss": 0.8459, "step": 112280 }, { "epoch": 1.9714180375357713, "grad_norm": 0.051942807004240484, "learning_rate": 0.00013688896039689032, "loss": 0.8541, "step": 112290 }, { "epoch": 1.9715936024157728, "grad_norm": 0.06665885391822368, "learning_rate": 0.00013687857454871533, "loss": 0.8488, "step": 112300 }, { "epoch": 1.9717691672957742, "grad_norm": 0.05234023747761811, "learning_rate": 0.0001368681882459519, "loss": 0.8488, "step": 112310 }, { "epoch": 1.9719447321757757, "grad_norm": 0.05344553054385083, "learning_rate": 0.00013685780148873171, "loss": 0.8493, "step": 112320 }, { "epoch": 1.972120297055777, "grad_norm": 0.06448900193207546, "learning_rate": 0.0001368474142771862, "loss": 0.8452, "step": 112330 }, { "epoch": 1.9722958619357782, "grad_norm": 0.04640013560323404, "learning_rate": 0.00013683702661144713, "loss": 0.8439, "step": 112340 }, { "epoch": 1.9724714268157797, "grad_norm": 0.0586605637525434, "learning_rate": 0.0001368266384916461, "loss": 0.8387, "step": 112350 }, { "epoch": 1.9726469916957812, "grad_norm": 0.08152566482918713, "learning_rate": 0.00013681624991791465, "loss": 0.843, "step": 112360 }, { "epoch": 1.9728225565757826, "grad_norm": 0.05641902023485922, "learning_rate": 0.00013680586089038447, "loss": 0.8488, "step": 112370 }, { "epoch": 1.9729981214557841, "grad_norm": 0.056450206311494264, "learning_rate": 0.00013679547140918712, "loss": 0.853, "step": 112380 }, { "epoch": 1.9731736863357854, "grad_norm": 0.06712940119443858, "learning_rate": 0.00013678508147445435, "loss": 0.8448, "step": 112390 }, { "epoch": 1.9733492512157866, "grad_norm": 0.059442025640018, "learning_rate": 0.00013677469108631772, "loss": 0.8478, "step": 112400 }, { "epoch": 1.9735248160957881, "grad_norm": 0.05494829901521695, "learning_rate": 0.0001367643002449089, "loss": 0.8527, "step": 112410 }, { "epoch": 1.9737003809757896, "grad_norm": 0.07532894017308209, "learning_rate": 0.00013675390895035954, "loss": 0.8497, "step": 112420 }, { "epoch": 1.973875945855791, "grad_norm": 0.04827355376336661, "learning_rate": 0.00013674351720280133, "loss": 0.8517, "step": 112430 }, { "epoch": 1.9740515107357925, "grad_norm": 0.05325019925914543, "learning_rate": 0.00013673312500236585, "loss": 0.8482, "step": 112440 }, { "epoch": 1.9742270756157938, "grad_norm": 0.04516517864573724, "learning_rate": 0.00013672273234918486, "loss": 0.8468, "step": 112450 }, { "epoch": 1.974402640495795, "grad_norm": 0.062616923633693, "learning_rate": 0.00013671233924339, "loss": 0.8518, "step": 112460 }, { "epoch": 1.9745782053757965, "grad_norm": 0.06138062110513502, "learning_rate": 0.00013670194568511293, "loss": 0.8509, "step": 112470 }, { "epoch": 1.974753770255798, "grad_norm": 0.06641328348026769, "learning_rate": 0.00013669155167448543, "loss": 0.8442, "step": 112480 }, { "epoch": 1.9749293351357995, "grad_norm": 0.05254735037647124, "learning_rate": 0.00013668115721163907, "loss": 0.8545, "step": 112490 }, { "epoch": 1.975104900015801, "grad_norm": 0.05014516364830169, "learning_rate": 0.0001366707622967056, "loss": 0.842, "step": 112500 }, { "epoch": 1.9752804648958022, "grad_norm": 0.045213294320378614, "learning_rate": 0.00013666036692981678, "loss": 0.8513, "step": 112510 }, { "epoch": 1.9754560297758037, "grad_norm": 0.06142220035936826, "learning_rate": 0.0001366499711111043, "loss": 0.8515, "step": 112520 }, { "epoch": 1.975631594655805, "grad_norm": 0.05311307592934878, "learning_rate": 0.0001366395748406998, "loss": 0.8408, "step": 112530 }, { "epoch": 1.9758071595358064, "grad_norm": 0.06067453259053971, "learning_rate": 0.0001366291781187351, "loss": 0.8483, "step": 112540 }, { "epoch": 1.975982724415808, "grad_norm": 0.04705721514124499, "learning_rate": 0.00013661878094534184, "loss": 0.8467, "step": 112550 }, { "epoch": 1.9761582892958094, "grad_norm": 0.05608046049114521, "learning_rate": 0.0001366083833206518, "loss": 0.8457, "step": 112560 }, { "epoch": 1.9763338541758106, "grad_norm": 0.07109562141525895, "learning_rate": 0.00013659798524479676, "loss": 0.853, "step": 112570 }, { "epoch": 1.9765094190558121, "grad_norm": 0.05425655221369997, "learning_rate": 0.00013658758671790836, "loss": 0.8363, "step": 112580 }, { "epoch": 1.9766849839358134, "grad_norm": 0.06076979571738597, "learning_rate": 0.0001365771877401185, "loss": 0.8516, "step": 112590 }, { "epoch": 1.9768605488158149, "grad_norm": 0.05899682487385788, "learning_rate": 0.0001365667883115588, "loss": 0.8482, "step": 112600 }, { "epoch": 1.9770361136958163, "grad_norm": 0.056984966705559005, "learning_rate": 0.00013655638843236106, "loss": 0.8498, "step": 112610 }, { "epoch": 1.9772116785758178, "grad_norm": 0.06154549184322054, "learning_rate": 0.00013654598810265714, "loss": 0.8407, "step": 112620 }, { "epoch": 1.977387243455819, "grad_norm": 0.05305198344610314, "learning_rate": 0.00013653558732257864, "loss": 0.8488, "step": 112630 }, { "epoch": 1.9775628083358205, "grad_norm": 0.050025536757220344, "learning_rate": 0.00013652518609225754, "loss": 0.8499, "step": 112640 }, { "epoch": 1.9777383732158218, "grad_norm": 0.06258042626368888, "learning_rate": 0.00013651478441182547, "loss": 0.8421, "step": 112650 }, { "epoch": 1.9779139380958233, "grad_norm": 0.04461053313000469, "learning_rate": 0.00013650438228141428, "loss": 0.8536, "step": 112660 }, { "epoch": 1.9780895029758248, "grad_norm": 0.06529034180677364, "learning_rate": 0.00013649397970115577, "loss": 0.8391, "step": 112670 }, { "epoch": 1.9782650678558262, "grad_norm": 0.07589689807920065, "learning_rate": 0.0001364835766711817, "loss": 0.8432, "step": 112680 }, { "epoch": 1.9784406327358275, "grad_norm": 0.04867637467160566, "learning_rate": 0.00013647317319162393, "loss": 0.8521, "step": 112690 }, { "epoch": 1.978616197615829, "grad_norm": 0.05630312766723142, "learning_rate": 0.00013646276926261428, "loss": 0.8445, "step": 112700 }, { "epoch": 1.9787917624958302, "grad_norm": 0.048956343834586837, "learning_rate": 0.00013645236488428453, "loss": 0.8452, "step": 112710 }, { "epoch": 1.9789673273758317, "grad_norm": 0.06688692944673606, "learning_rate": 0.0001364419600567665, "loss": 0.8435, "step": 112720 }, { "epoch": 1.9791428922558332, "grad_norm": 0.05997518761702158, "learning_rate": 0.00013643155478019206, "loss": 0.8514, "step": 112730 }, { "epoch": 1.9793184571358347, "grad_norm": 0.06524179363941135, "learning_rate": 0.00013642114905469305, "loss": 0.8499, "step": 112740 }, { "epoch": 1.979494022015836, "grad_norm": 0.047889682893501345, "learning_rate": 0.00013641074288040127, "loss": 0.8504, "step": 112750 }, { "epoch": 1.9796695868958374, "grad_norm": 0.06476962900790542, "learning_rate": 0.00013640033625744862, "loss": 0.8453, "step": 112760 }, { "epoch": 1.9798451517758386, "grad_norm": 0.08940209374492061, "learning_rate": 0.0001363899291859669, "loss": 0.844, "step": 112770 }, { "epoch": 1.9800207166558401, "grad_norm": 0.05380295403122007, "learning_rate": 0.00013637952166608802, "loss": 0.8466, "step": 112780 }, { "epoch": 1.9801962815358416, "grad_norm": 0.04696816727002857, "learning_rate": 0.00013636911369794378, "loss": 0.8457, "step": 112790 }, { "epoch": 1.980371846415843, "grad_norm": 0.048097653270389916, "learning_rate": 0.00013635870528166613, "loss": 0.8485, "step": 112800 }, { "epoch": 1.9805474112958443, "grad_norm": 0.05345709559411261, "learning_rate": 0.0001363482964173869, "loss": 0.8466, "step": 112810 }, { "epoch": 1.9807229761758458, "grad_norm": 0.06359764668736587, "learning_rate": 0.00013633788710523795, "loss": 0.8523, "step": 112820 }, { "epoch": 1.980898541055847, "grad_norm": 0.05095872464464077, "learning_rate": 0.00013632747734535126, "loss": 0.8409, "step": 112830 }, { "epoch": 1.9810741059358485, "grad_norm": 0.0605210526562662, "learning_rate": 0.0001363170671378586, "loss": 0.8483, "step": 112840 }, { "epoch": 1.98124967081585, "grad_norm": 0.07275166049036463, "learning_rate": 0.0001363066564828919, "loss": 0.8469, "step": 112850 }, { "epoch": 1.9814252356958515, "grad_norm": 0.05694802572171876, "learning_rate": 0.0001362962453805832, "loss": 0.8456, "step": 112860 }, { "epoch": 1.981600800575853, "grad_norm": 0.06331686768126674, "learning_rate": 0.00013628583383106424, "loss": 0.8514, "step": 112870 }, { "epoch": 1.9817763654558542, "grad_norm": 0.06567382078638014, "learning_rate": 0.00013627542183446703, "loss": 0.8496, "step": 112880 }, { "epoch": 1.9819519303358555, "grad_norm": 0.0589973633253366, "learning_rate": 0.00013626500939092345, "loss": 0.8529, "step": 112890 }, { "epoch": 1.982127495215857, "grad_norm": 0.052477192636918545, "learning_rate": 0.00013625459650056543, "loss": 0.8493, "step": 112900 }, { "epoch": 1.9823030600958584, "grad_norm": 0.046796588699126763, "learning_rate": 0.00013624418316352492, "loss": 0.8477, "step": 112910 }, { "epoch": 1.98247862497586, "grad_norm": 0.0520443631579435, "learning_rate": 0.00013623376937993385, "loss": 0.8483, "step": 112920 }, { "epoch": 1.9826541898558614, "grad_norm": 0.06478219818683789, "learning_rate": 0.00013622335514992424, "loss": 0.8477, "step": 112930 }, { "epoch": 1.9828297547358626, "grad_norm": 0.05176410026108621, "learning_rate": 0.0001362129404736279, "loss": 0.8457, "step": 112940 }, { "epoch": 1.983005319615864, "grad_norm": 0.05096199104229896, "learning_rate": 0.00013620252535117688, "loss": 0.847, "step": 112950 }, { "epoch": 1.9831808844958654, "grad_norm": 0.04649112624620297, "learning_rate": 0.0001361921097827031, "loss": 0.8442, "step": 112960 }, { "epoch": 1.9833564493758669, "grad_norm": 0.04745320822544568, "learning_rate": 0.00013618169376833852, "loss": 0.845, "step": 112970 }, { "epoch": 1.9835320142558683, "grad_norm": 0.0874565540704111, "learning_rate": 0.00013617127730821516, "loss": 0.8455, "step": 112980 }, { "epoch": 1.9837075791358698, "grad_norm": 0.06487104145718568, "learning_rate": 0.000136160860402465, "loss": 0.8528, "step": 112990 }, { "epoch": 1.983883144015871, "grad_norm": 0.06132186082515845, "learning_rate": 0.00013615044305122, "loss": 0.8425, "step": 113000 }, { "epoch": 1.9840587088958723, "grad_norm": 0.052583871069146235, "learning_rate": 0.0001361400252546121, "loss": 0.8501, "step": 113010 }, { "epoch": 1.9842342737758738, "grad_norm": 0.04862285707600157, "learning_rate": 0.0001361296070127734, "loss": 0.8333, "step": 113020 }, { "epoch": 1.9844098386558753, "grad_norm": 0.0472821307519736, "learning_rate": 0.0001361191883258358, "loss": 0.8422, "step": 113030 }, { "epoch": 1.9845854035358768, "grad_norm": 0.05567995079914193, "learning_rate": 0.00013610876919393144, "loss": 0.8397, "step": 113040 }, { "epoch": 1.9847609684158782, "grad_norm": 0.05893082532672756, "learning_rate": 0.00013609834961719218, "loss": 0.8515, "step": 113050 }, { "epoch": 1.9849365332958795, "grad_norm": 0.047785088558486614, "learning_rate": 0.0001360879295957501, "loss": 0.8411, "step": 113060 }, { "epoch": 1.9851120981758807, "grad_norm": 0.05698853413723527, "learning_rate": 0.0001360775091297372, "loss": 0.8492, "step": 113070 }, { "epoch": 1.9852876630558822, "grad_norm": 0.07032625120067681, "learning_rate": 0.00013606708821928558, "loss": 0.8408, "step": 113080 }, { "epoch": 1.9854632279358837, "grad_norm": 0.06140224469173712, "learning_rate": 0.00013605666686452724, "loss": 0.8426, "step": 113090 }, { "epoch": 1.9856387928158852, "grad_norm": 0.06461662001836235, "learning_rate": 0.0001360462450655942, "loss": 0.8408, "step": 113100 }, { "epoch": 1.9858143576958867, "grad_norm": 0.05357033268772333, "learning_rate": 0.0001360358228226185, "loss": 0.8392, "step": 113110 }, { "epoch": 1.985989922575888, "grad_norm": 0.05280731136624155, "learning_rate": 0.00013602540013573223, "loss": 0.8452, "step": 113120 }, { "epoch": 1.9861654874558892, "grad_norm": 0.05083691871196495, "learning_rate": 0.00013601497700506745, "loss": 0.8429, "step": 113130 }, { "epoch": 1.9863410523358906, "grad_norm": 0.05982377455675431, "learning_rate": 0.00013600455343075612, "loss": 0.8459, "step": 113140 }, { "epoch": 1.9865166172158921, "grad_norm": 0.06679549430252404, "learning_rate": 0.00013599412941293047, "loss": 0.8384, "step": 113150 }, { "epoch": 1.9866921820958936, "grad_norm": 0.062478230366772886, "learning_rate": 0.00013598370495172251, "loss": 0.8514, "step": 113160 }, { "epoch": 1.986867746975895, "grad_norm": 0.05927878761538843, "learning_rate": 0.00013597328004726425, "loss": 0.8436, "step": 113170 }, { "epoch": 1.9870433118558963, "grad_norm": 0.056186843238671555, "learning_rate": 0.0001359628546996878, "loss": 0.8453, "step": 113180 }, { "epoch": 1.9872188767358976, "grad_norm": 0.0489662957662357, "learning_rate": 0.00013595242890912534, "loss": 0.851, "step": 113190 }, { "epoch": 1.987394441615899, "grad_norm": 0.05545039656461873, "learning_rate": 0.0001359420026757089, "loss": 0.8468, "step": 113200 }, { "epoch": 1.9875700064959005, "grad_norm": 0.05497134062028974, "learning_rate": 0.00013593157599957055, "loss": 0.846, "step": 113210 }, { "epoch": 1.987745571375902, "grad_norm": 0.05588599263776602, "learning_rate": 0.0001359211488808425, "loss": 0.8403, "step": 113220 }, { "epoch": 1.9879211362559035, "grad_norm": 0.06951052116175246, "learning_rate": 0.00013591072131965675, "loss": 0.8475, "step": 113230 }, { "epoch": 1.9880967011359048, "grad_norm": 0.05856090499716973, "learning_rate": 0.00013590029331614548, "loss": 0.8512, "step": 113240 }, { "epoch": 1.9882722660159062, "grad_norm": 0.053546904069701216, "learning_rate": 0.00013588986487044082, "loss": 0.8438, "step": 113250 }, { "epoch": 1.9884478308959075, "grad_norm": 0.04801920089532098, "learning_rate": 0.00013587943598267487, "loss": 0.8476, "step": 113260 }, { "epoch": 1.988623395775909, "grad_norm": 0.07622268684859984, "learning_rate": 0.0001358690066529798, "loss": 0.8468, "step": 113270 }, { "epoch": 1.9887989606559104, "grad_norm": 0.06363166585750886, "learning_rate": 0.00013585857688148774, "loss": 0.8498, "step": 113280 }, { "epoch": 1.988974525535912, "grad_norm": 0.07435371805308183, "learning_rate": 0.0001358481466683308, "loss": 0.8501, "step": 113290 }, { "epoch": 1.9891500904159132, "grad_norm": 0.06290964644000084, "learning_rate": 0.00013583771601364115, "loss": 0.8452, "step": 113300 }, { "epoch": 1.9893256552959147, "grad_norm": 0.04790562376505452, "learning_rate": 0.000135827284917551, "loss": 0.8444, "step": 113310 }, { "epoch": 1.989501220175916, "grad_norm": 0.052830277630511804, "learning_rate": 0.00013581685338019244, "loss": 0.8457, "step": 113320 }, { "epoch": 1.9896767850559174, "grad_norm": 0.05864565921898938, "learning_rate": 0.00013580642140169772, "loss": 0.8494, "step": 113330 }, { "epoch": 1.9898523499359189, "grad_norm": 0.05687451092108016, "learning_rate": 0.00013579598898219897, "loss": 0.8461, "step": 113340 }, { "epoch": 1.9900279148159203, "grad_norm": 0.05553310759628339, "learning_rate": 0.00013578555612182833, "loss": 0.8482, "step": 113350 }, { "epoch": 1.9902034796959216, "grad_norm": 0.06605189641358583, "learning_rate": 0.000135775122820718, "loss": 0.8415, "step": 113360 }, { "epoch": 1.990379044575923, "grad_norm": 0.06038150575719236, "learning_rate": 0.0001357646890790003, "loss": 0.8458, "step": 113370 }, { "epoch": 1.9905546094559243, "grad_norm": 0.07408070250145019, "learning_rate": 0.00013575425489680728, "loss": 0.8487, "step": 113380 }, { "epoch": 1.9907301743359258, "grad_norm": 0.07869457394329508, "learning_rate": 0.00013574382027427116, "loss": 0.8493, "step": 113390 }, { "epoch": 1.9909057392159273, "grad_norm": 0.07000849879809619, "learning_rate": 0.0001357333852115242, "loss": 0.8426, "step": 113400 }, { "epoch": 1.9910813040959288, "grad_norm": 0.050163243763052125, "learning_rate": 0.00013572294970869858, "loss": 0.8426, "step": 113410 }, { "epoch": 1.99125686897593, "grad_norm": 0.05111133452457369, "learning_rate": 0.00013571251376592655, "loss": 0.8444, "step": 113420 }, { "epoch": 1.9914324338559315, "grad_norm": 0.04787731569194388, "learning_rate": 0.00013570207738334032, "loss": 0.8456, "step": 113430 }, { "epoch": 1.9916079987359327, "grad_norm": 0.08786222168563011, "learning_rate": 0.00013569164056107213, "loss": 0.8507, "step": 113440 }, { "epoch": 1.9917835636159342, "grad_norm": 0.050146745491604, "learning_rate": 0.00013568120329925419, "loss": 0.8477, "step": 113450 }, { "epoch": 1.9919591284959357, "grad_norm": 0.07170616617284797, "learning_rate": 0.00013567076559801875, "loss": 0.8498, "step": 113460 }, { "epoch": 1.9921346933759372, "grad_norm": 0.05019008208724075, "learning_rate": 0.00013566032745749806, "loss": 0.8452, "step": 113470 }, { "epoch": 1.9923102582559384, "grad_norm": 0.04752802994267239, "learning_rate": 0.00013564988887782437, "loss": 0.848, "step": 113480 }, { "epoch": 1.99248582313594, "grad_norm": 0.04473179781088009, "learning_rate": 0.00013563944985912996, "loss": 0.8417, "step": 113490 }, { "epoch": 1.9926613880159412, "grad_norm": 0.06503432033884965, "learning_rate": 0.00013562901040154712, "loss": 0.8404, "step": 113500 }, { "epoch": 1.9928369528959426, "grad_norm": 0.06509192068622259, "learning_rate": 0.00013561857050520805, "loss": 0.8534, "step": 113510 }, { "epoch": 1.9930125177759441, "grad_norm": 0.05307628776245263, "learning_rate": 0.00013560813017024506, "loss": 0.8408, "step": 113520 }, { "epoch": 1.9931880826559456, "grad_norm": 0.05240323420658199, "learning_rate": 0.00013559768939679038, "loss": 0.8588, "step": 113530 }, { "epoch": 1.9933636475359469, "grad_norm": 0.05475029428534364, "learning_rate": 0.00013558724818497642, "loss": 0.8438, "step": 113540 }, { "epoch": 1.9935392124159483, "grad_norm": 0.05833286532931981, "learning_rate": 0.00013557680653493537, "loss": 0.8309, "step": 113550 }, { "epoch": 1.9937147772959496, "grad_norm": 0.14077713735813713, "learning_rate": 0.00013556636444679956, "loss": 0.8498, "step": 113560 }, { "epoch": 1.993890342175951, "grad_norm": 0.049583402173552, "learning_rate": 0.00013555592192070125, "loss": 0.8409, "step": 113570 }, { "epoch": 1.9940659070559525, "grad_norm": 0.05273531365049017, "learning_rate": 0.0001355454789567728, "loss": 0.8519, "step": 113580 }, { "epoch": 1.994241471935954, "grad_norm": 0.051443593076406675, "learning_rate": 0.00013553503555514655, "loss": 0.8495, "step": 113590 }, { "epoch": 1.9944170368159555, "grad_norm": 0.055307706721711924, "learning_rate": 0.00013552459171595473, "loss": 0.8466, "step": 113600 }, { "epoch": 1.9945926016959568, "grad_norm": 0.06382103446475906, "learning_rate": 0.0001355141474393298, "loss": 0.8492, "step": 113610 }, { "epoch": 1.994768166575958, "grad_norm": 0.06311067394725677, "learning_rate": 0.00013550370272540394, "loss": 0.8474, "step": 113620 }, { "epoch": 1.9949437314559595, "grad_norm": 0.049940949473648466, "learning_rate": 0.00013549325757430958, "loss": 0.8386, "step": 113630 }, { "epoch": 1.995119296335961, "grad_norm": 0.04888330499386549, "learning_rate": 0.000135482811986179, "loss": 0.8442, "step": 113640 }, { "epoch": 1.9952948612159624, "grad_norm": 0.05084791964167364, "learning_rate": 0.0001354723659611446, "loss": 0.8469, "step": 113650 }, { "epoch": 1.995470426095964, "grad_norm": 0.06494654435429727, "learning_rate": 0.00013546191949933873, "loss": 0.839, "step": 113660 }, { "epoch": 1.9956459909759652, "grad_norm": 0.060790159166661695, "learning_rate": 0.00013545147260089372, "loss": 0.8372, "step": 113670 }, { "epoch": 1.9958215558559664, "grad_norm": 0.0750173180028978, "learning_rate": 0.000135441025265942, "loss": 0.846, "step": 113680 }, { "epoch": 1.995997120735968, "grad_norm": 0.05890827825219573, "learning_rate": 0.00013543057749461582, "loss": 0.8551, "step": 113690 }, { "epoch": 1.9961726856159694, "grad_norm": 0.05154412548586042, "learning_rate": 0.00013542012928704762, "loss": 0.8429, "step": 113700 }, { "epoch": 1.9963482504959709, "grad_norm": 0.0795098287776006, "learning_rate": 0.0001354096806433698, "loss": 0.8481, "step": 113710 }, { "epoch": 1.9965238153759723, "grad_norm": 0.04770524572030175, "learning_rate": 0.00013539923156371479, "loss": 0.8485, "step": 113720 }, { "epoch": 1.9966993802559736, "grad_norm": 0.061915312129725085, "learning_rate": 0.00013538878204821488, "loss": 0.8463, "step": 113730 }, { "epoch": 1.9968749451359749, "grad_norm": 0.04545566232544252, "learning_rate": 0.0001353783320970025, "loss": 0.8428, "step": 113740 }, { "epoch": 1.9970505100159763, "grad_norm": 0.04865147066821, "learning_rate": 0.00013536788171021006, "loss": 0.8492, "step": 113750 }, { "epoch": 1.9972260748959778, "grad_norm": 0.05772005922730127, "learning_rate": 0.00013535743088796998, "loss": 0.8437, "step": 113760 }, { "epoch": 1.9974016397759793, "grad_norm": 0.052887716663862286, "learning_rate": 0.00013534697963041466, "loss": 0.8481, "step": 113770 }, { "epoch": 1.9975772046559808, "grad_norm": 0.05409510889263886, "learning_rate": 0.00013533652793767657, "loss": 0.8474, "step": 113780 }, { "epoch": 1.997752769535982, "grad_norm": 0.049500497001010106, "learning_rate": 0.00013532607580988806, "loss": 0.8513, "step": 113790 }, { "epoch": 1.9979283344159833, "grad_norm": 0.07757090466959704, "learning_rate": 0.0001353156232471816, "loss": 0.8474, "step": 113800 }, { "epoch": 1.9981038992959848, "grad_norm": 0.061776725536211566, "learning_rate": 0.0001353051702496896, "loss": 0.8372, "step": 113810 }, { "epoch": 1.9982794641759862, "grad_norm": 0.06178439615651428, "learning_rate": 0.0001352947168175445, "loss": 0.8464, "step": 113820 }, { "epoch": 1.9984550290559877, "grad_norm": 0.07912462738574236, "learning_rate": 0.00013528426295087882, "loss": 0.8436, "step": 113830 }, { "epoch": 1.9986305939359892, "grad_norm": 0.05231637641634119, "learning_rate": 0.00013527380864982493, "loss": 0.8433, "step": 113840 }, { "epoch": 1.9988061588159904, "grad_norm": 0.0662815104211963, "learning_rate": 0.0001352633539145153, "loss": 0.846, "step": 113850 }, { "epoch": 1.9989817236959917, "grad_norm": 0.0458997055838155, "learning_rate": 0.0001352528987450824, "loss": 0.8398, "step": 113860 }, { "epoch": 1.9991572885759932, "grad_norm": 0.07614035736272887, "learning_rate": 0.00013524244314165875, "loss": 0.844, "step": 113870 }, { "epoch": 1.9993328534559947, "grad_norm": 0.06490601276969686, "learning_rate": 0.00013523198710437676, "loss": 0.8393, "step": 113880 }, { "epoch": 1.9995084183359961, "grad_norm": 0.05782898867264072, "learning_rate": 0.00013522153063336896, "loss": 0.8512, "step": 113890 }, { "epoch": 1.9996839832159976, "grad_norm": 0.04503327316327342, "learning_rate": 0.0001352110737287678, "loss": 0.8485, "step": 113900 }, { "epoch": 1.9998595480959989, "grad_norm": 0.07053254884046814, "learning_rate": 0.00013520061639070574, "loss": 0.8464, "step": 113910 }, { "epoch": 2.000035112976, "grad_norm": 0.0606326042215767, "learning_rate": 0.00013519015861931534, "loss": 0.843, "step": 113920 }, { "epoch": 2.0002106778560016, "grad_norm": 0.058836147346170244, "learning_rate": 0.00013517970041472907, "loss": 0.8522, "step": 113930 }, { "epoch": 2.000386242736003, "grad_norm": 0.044514210430623855, "learning_rate": 0.00013516924177707946, "loss": 0.8456, "step": 113940 }, { "epoch": 2.0005618076160046, "grad_norm": 0.05777230690172198, "learning_rate": 0.00013515878270649906, "loss": 0.8451, "step": 113950 }, { "epoch": 2.000737372496006, "grad_norm": 0.0502243830580969, "learning_rate": 0.00013514832320312022, "loss": 0.854, "step": 113960 }, { "epoch": 2.0009129373760075, "grad_norm": 0.06955115754152005, "learning_rate": 0.00013513786326707568, "loss": 0.8539, "step": 113970 }, { "epoch": 2.0010885022560085, "grad_norm": 0.04790961120870193, "learning_rate": 0.0001351274028984978, "loss": 0.8446, "step": 113980 }, { "epoch": 2.00126406713601, "grad_norm": 0.06665954622937104, "learning_rate": 0.0001351169420975192, "loss": 0.8463, "step": 113990 }, { "epoch": 2.0014396320160115, "grad_norm": 0.06310645904546874, "learning_rate": 0.0001351064808642724, "loss": 0.8441, "step": 114000 }, { "epoch": 2.001615196896013, "grad_norm": 0.044112943451601806, "learning_rate": 0.00013509601919889, "loss": 0.8525, "step": 114010 }, { "epoch": 2.0017907617760144, "grad_norm": 0.06093453613962986, "learning_rate": 0.0001350855571015045, "loss": 0.8429, "step": 114020 }, { "epoch": 2.001966326656016, "grad_norm": 0.06150864102800133, "learning_rate": 0.0001350750945722484, "loss": 0.8429, "step": 114030 }, { "epoch": 2.002141891536017, "grad_norm": 0.05229750838209784, "learning_rate": 0.00013506463161125435, "loss": 0.8443, "step": 114040 }, { "epoch": 2.0023174564160184, "grad_norm": 0.07207908972500637, "learning_rate": 0.0001350541682186549, "loss": 0.8479, "step": 114050 }, { "epoch": 2.00249302129602, "grad_norm": 0.061654797840156296, "learning_rate": 0.0001350437043945826, "loss": 0.8476, "step": 114060 }, { "epoch": 2.0026685861760214, "grad_norm": 0.05993847115616903, "learning_rate": 0.00013503324013917006, "loss": 0.8499, "step": 114070 }, { "epoch": 2.002844151056023, "grad_norm": 0.052604870392052284, "learning_rate": 0.00013502277545254982, "loss": 0.8415, "step": 114080 }, { "epoch": 2.0030197159360243, "grad_norm": 0.06461825526203878, "learning_rate": 0.0001350123103348545, "loss": 0.8473, "step": 114090 }, { "epoch": 2.0031952808160254, "grad_norm": 0.060306963996115626, "learning_rate": 0.0001350018447862167, "loss": 0.8529, "step": 114100 }, { "epoch": 2.003370845696027, "grad_norm": 0.08836145389322259, "learning_rate": 0.000134991378806769, "loss": 0.8458, "step": 114110 }, { "epoch": 2.0035464105760283, "grad_norm": 0.09286037855120095, "learning_rate": 0.00013498091239664403, "loss": 0.8471, "step": 114120 }, { "epoch": 2.00372197545603, "grad_norm": 0.06145041644904422, "learning_rate": 0.00013497044555597438, "loss": 0.85, "step": 114130 }, { "epoch": 2.0038975403360313, "grad_norm": 0.049034754672637605, "learning_rate": 0.0001349599782848927, "loss": 0.8379, "step": 114140 }, { "epoch": 2.0040731052160328, "grad_norm": 0.061322893966892555, "learning_rate": 0.00013494951058353152, "loss": 0.8477, "step": 114150 }, { "epoch": 2.004248670096034, "grad_norm": 0.05212551232791151, "learning_rate": 0.00013493904245202355, "loss": 0.8382, "step": 114160 }, { "epoch": 2.0044242349760353, "grad_norm": 0.04923527604975416, "learning_rate": 0.00013492857389050145, "loss": 0.8424, "step": 114170 }, { "epoch": 2.0045997998560368, "grad_norm": 0.05638140199273135, "learning_rate": 0.0001349181048990978, "loss": 0.8476, "step": 114180 }, { "epoch": 2.0047753647360382, "grad_norm": 0.06512216120070365, "learning_rate": 0.00013490763547794528, "loss": 0.8412, "step": 114190 }, { "epoch": 2.0049509296160397, "grad_norm": 0.053732995663984706, "learning_rate": 0.00013489716562717648, "loss": 0.8494, "step": 114200 }, { "epoch": 2.005126494496041, "grad_norm": 0.05985599047769587, "learning_rate": 0.0001348866953469241, "loss": 0.8518, "step": 114210 }, { "epoch": 2.0053020593760422, "grad_norm": 0.06915741579989299, "learning_rate": 0.00013487622463732084, "loss": 0.8454, "step": 114220 }, { "epoch": 2.0054776242560437, "grad_norm": 0.049260058539230524, "learning_rate": 0.0001348657534984993, "loss": 0.8447, "step": 114230 }, { "epoch": 2.005653189136045, "grad_norm": 0.07321162544386986, "learning_rate": 0.00013485528193059215, "loss": 0.8537, "step": 114240 }, { "epoch": 2.0058287540160467, "grad_norm": 0.04416521541643712, "learning_rate": 0.0001348448099337321, "loss": 0.8562, "step": 114250 }, { "epoch": 2.006004318896048, "grad_norm": 0.051233888064597444, "learning_rate": 0.00013483433750805182, "loss": 0.8489, "step": 114260 }, { "epoch": 2.0061798837760496, "grad_norm": 0.07776902567710309, "learning_rate": 0.00013482386465368403, "loss": 0.8443, "step": 114270 }, { "epoch": 2.0063554486560506, "grad_norm": 0.05624012643898958, "learning_rate": 0.00013481339137076133, "loss": 0.8513, "step": 114280 }, { "epoch": 2.006531013536052, "grad_norm": 0.06466818814883778, "learning_rate": 0.00013480291765941658, "loss": 0.8452, "step": 114290 }, { "epoch": 2.0067065784160536, "grad_norm": 0.060343804075031295, "learning_rate": 0.00013479244351978227, "loss": 0.8458, "step": 114300 }, { "epoch": 2.006882143296055, "grad_norm": 0.07616588654708759, "learning_rate": 0.0001347819689519913, "loss": 0.8464, "step": 114310 }, { "epoch": 2.0070577081760566, "grad_norm": 0.06208738496621342, "learning_rate": 0.00013477149395617628, "loss": 0.8458, "step": 114320 }, { "epoch": 2.007233273056058, "grad_norm": 0.04949669835570067, "learning_rate": 0.00013476101853246994, "loss": 0.8421, "step": 114330 }, { "epoch": 2.007408837936059, "grad_norm": 0.08528483250846157, "learning_rate": 0.00013475054268100506, "loss": 0.8535, "step": 114340 }, { "epoch": 2.0075844028160605, "grad_norm": 0.29097020814177377, "learning_rate": 0.00013474006640191435, "loss": 0.8452, "step": 114350 }, { "epoch": 2.007759967696062, "grad_norm": 0.05365254475189281, "learning_rate": 0.0001347295896953305, "loss": 0.8406, "step": 114360 }, { "epoch": 2.0079355325760635, "grad_norm": 0.05382612429457709, "learning_rate": 0.0001347191125613863, "loss": 0.8535, "step": 114370 }, { "epoch": 2.008111097456065, "grad_norm": 0.05424787582843832, "learning_rate": 0.00013470863500021445, "loss": 0.8496, "step": 114380 }, { "epoch": 2.0082866623360665, "grad_norm": 0.05137209334914331, "learning_rate": 0.00013469815701194776, "loss": 0.8456, "step": 114390 }, { "epoch": 2.0084622272160675, "grad_norm": 0.05126529508172749, "learning_rate": 0.00013468767859671897, "loss": 0.8479, "step": 114400 }, { "epoch": 2.008637792096069, "grad_norm": 0.062175359237212374, "learning_rate": 0.00013467719975466085, "loss": 0.8458, "step": 114410 }, { "epoch": 2.0088133569760704, "grad_norm": 0.06270036048786773, "learning_rate": 0.00013466672048590613, "loss": 0.8372, "step": 114420 }, { "epoch": 2.008988921856072, "grad_norm": 0.07065660048532044, "learning_rate": 0.0001346562407905876, "loss": 0.8448, "step": 114430 }, { "epoch": 2.0091644867360734, "grad_norm": 0.08576976190254285, "learning_rate": 0.00013464576066883805, "loss": 0.8484, "step": 114440 }, { "epoch": 2.009340051616075, "grad_norm": 0.0721886926160602, "learning_rate": 0.00013463528012079028, "loss": 0.8478, "step": 114450 }, { "epoch": 2.009515616496076, "grad_norm": 0.050288629384035985, "learning_rate": 0.00013462479914657708, "loss": 0.8412, "step": 114460 }, { "epoch": 2.0096911813760774, "grad_norm": 0.05126094509481073, "learning_rate": 0.0001346143177463312, "loss": 0.8506, "step": 114470 }, { "epoch": 2.009866746256079, "grad_norm": 0.04262736938493137, "learning_rate": 0.0001346038359201855, "loss": 0.8481, "step": 114480 }, { "epoch": 2.0100423111360803, "grad_norm": 0.0431808176996709, "learning_rate": 0.0001345933536682727, "loss": 0.8484, "step": 114490 }, { "epoch": 2.010217876016082, "grad_norm": 0.073128948536251, "learning_rate": 0.0001345828709907257, "loss": 0.8468, "step": 114500 }, { "epoch": 2.0103934408960833, "grad_norm": 0.06269174452867857, "learning_rate": 0.0001345723878876773, "loss": 0.8482, "step": 114510 }, { "epoch": 2.0105690057760848, "grad_norm": 0.04559977122566213, "learning_rate": 0.00013456190435926036, "loss": 0.8514, "step": 114520 }, { "epoch": 2.010744570656086, "grad_norm": 0.045154179152875146, "learning_rate": 0.0001345514204056076, "loss": 0.8419, "step": 114530 }, { "epoch": 2.0109201355360873, "grad_norm": 0.052095697519621016, "learning_rate": 0.00013454093602685194, "loss": 0.8432, "step": 114540 }, { "epoch": 2.0110957004160888, "grad_norm": 0.04975314424321105, "learning_rate": 0.00013453045122312614, "loss": 0.8444, "step": 114550 }, { "epoch": 2.0112712652960902, "grad_norm": 0.05745755712307473, "learning_rate": 0.00013451996599456314, "loss": 0.849, "step": 114560 }, { "epoch": 2.0114468301760917, "grad_norm": 0.06201977290482736, "learning_rate": 0.00013450948034129576, "loss": 0.846, "step": 114570 }, { "epoch": 2.011622395056093, "grad_norm": 0.05308793158567267, "learning_rate": 0.00013449899426345682, "loss": 0.8512, "step": 114580 }, { "epoch": 2.0117979599360942, "grad_norm": 0.04944267162345111, "learning_rate": 0.00013448850776117918, "loss": 0.8506, "step": 114590 }, { "epoch": 2.0119735248160957, "grad_norm": 0.04476102678855464, "learning_rate": 0.00013447802083459576, "loss": 0.8334, "step": 114600 }, { "epoch": 2.012149089696097, "grad_norm": 0.05606315520393755, "learning_rate": 0.0001344675334838394, "loss": 0.8412, "step": 114610 }, { "epoch": 2.0123246545760987, "grad_norm": 0.05685905632906982, "learning_rate": 0.00013445704570904296, "loss": 0.8496, "step": 114620 }, { "epoch": 2.0125002194561, "grad_norm": 0.0734451718413507, "learning_rate": 0.0001344465575103394, "loss": 0.8422, "step": 114630 }, { "epoch": 2.0126757843361016, "grad_norm": 0.05473215171140964, "learning_rate": 0.00013443606888786146, "loss": 0.8556, "step": 114640 }, { "epoch": 2.0128513492161026, "grad_norm": 0.05294759787718478, "learning_rate": 0.00013442557984174217, "loss": 0.8396, "step": 114650 }, { "epoch": 2.013026914096104, "grad_norm": 0.05347678201464379, "learning_rate": 0.00013441509037211434, "loss": 0.8454, "step": 114660 }, { "epoch": 2.0132024789761056, "grad_norm": 0.059919025087596445, "learning_rate": 0.00013440460047911093, "loss": 0.8503, "step": 114670 }, { "epoch": 2.013378043856107, "grad_norm": 0.05095874032247942, "learning_rate": 0.00013439411016286486, "loss": 0.8494, "step": 114680 }, { "epoch": 2.0135536087361086, "grad_norm": 0.06607182344166838, "learning_rate": 0.00013438361942350898, "loss": 0.8465, "step": 114690 }, { "epoch": 2.01372917361611, "grad_norm": 0.057785898319845784, "learning_rate": 0.00013437312826117625, "loss": 0.8526, "step": 114700 }, { "epoch": 2.013904738496111, "grad_norm": 0.0781211512870403, "learning_rate": 0.00013436263667599959, "loss": 0.8561, "step": 114710 }, { "epoch": 2.0140803033761125, "grad_norm": 0.06452074092178503, "learning_rate": 0.00013435214466811193, "loss": 0.8534, "step": 114720 }, { "epoch": 2.014255868256114, "grad_norm": 0.04572098735602519, "learning_rate": 0.0001343416522376462, "loss": 0.8453, "step": 114730 }, { "epoch": 2.0144314331361155, "grad_norm": 0.05654720246574256, "learning_rate": 0.00013433115938473538, "loss": 0.851, "step": 114740 }, { "epoch": 2.014606998016117, "grad_norm": 0.05815307110010233, "learning_rate": 0.00013432066610951233, "loss": 0.855, "step": 114750 }, { "epoch": 2.0147825628961185, "grad_norm": 0.07833316220779164, "learning_rate": 0.00013431017241211007, "loss": 0.8412, "step": 114760 }, { "epoch": 2.0149581277761195, "grad_norm": 0.06269227788139098, "learning_rate": 0.00013429967829266154, "loss": 0.8515, "step": 114770 }, { "epoch": 2.015133692656121, "grad_norm": 0.06583459935517852, "learning_rate": 0.00013428918375129974, "loss": 0.8531, "step": 114780 }, { "epoch": 2.0153092575361224, "grad_norm": 0.05590005707062792, "learning_rate": 0.00013427868878815758, "loss": 0.8491, "step": 114790 }, { "epoch": 2.015484822416124, "grad_norm": 0.057171093498978, "learning_rate": 0.00013426819340336808, "loss": 0.8441, "step": 114800 }, { "epoch": 2.0156603872961254, "grad_norm": 0.0673890045774469, "learning_rate": 0.00013425769759706414, "loss": 0.8466, "step": 114810 }, { "epoch": 2.015835952176127, "grad_norm": 0.09249265438384394, "learning_rate": 0.00013424720136937882, "loss": 0.8437, "step": 114820 }, { "epoch": 2.016011517056128, "grad_norm": 0.06066608688458283, "learning_rate": 0.00013423670472044507, "loss": 0.8519, "step": 114830 }, { "epoch": 2.0161870819361294, "grad_norm": 0.06628922226591351, "learning_rate": 0.00013422620765039594, "loss": 0.8442, "step": 114840 }, { "epoch": 2.016362646816131, "grad_norm": 0.06389109337263413, "learning_rate": 0.00013421571015936437, "loss": 0.8504, "step": 114850 }, { "epoch": 2.0165382116961323, "grad_norm": 0.04792761571629373, "learning_rate": 0.0001342052122474834, "loss": 0.8451, "step": 114860 }, { "epoch": 2.016713776576134, "grad_norm": 0.058215515000586075, "learning_rate": 0.00013419471391488603, "loss": 0.8469, "step": 114870 }, { "epoch": 2.0168893414561353, "grad_norm": 0.053044099058293624, "learning_rate": 0.00013418421516170524, "loss": 0.8491, "step": 114880 }, { "epoch": 2.0170649063361363, "grad_norm": 0.04929495799488826, "learning_rate": 0.0001341737159880741, "loss": 0.8495, "step": 114890 }, { "epoch": 2.017240471216138, "grad_norm": 0.06880087881004598, "learning_rate": 0.00013416321639412562, "loss": 0.8439, "step": 114900 }, { "epoch": 2.0174160360961393, "grad_norm": 0.052465731460408206, "learning_rate": 0.00013415271637999286, "loss": 0.8452, "step": 114910 }, { "epoch": 2.0175916009761408, "grad_norm": 0.0665734467835106, "learning_rate": 0.00013414221594580883, "loss": 0.8422, "step": 114920 }, { "epoch": 2.0177671658561422, "grad_norm": 0.06740423812929837, "learning_rate": 0.00013413171509170653, "loss": 0.8406, "step": 114930 }, { "epoch": 2.0179427307361437, "grad_norm": 0.062172901040825145, "learning_rate": 0.00013412121381781904, "loss": 0.8426, "step": 114940 }, { "epoch": 2.0181182956161448, "grad_norm": 0.08665204389214046, "learning_rate": 0.00013411071212427947, "loss": 0.8462, "step": 114950 }, { "epoch": 2.0182938604961462, "grad_norm": 0.055718351648822356, "learning_rate": 0.0001341002100112208, "loss": 0.8503, "step": 114960 }, { "epoch": 2.0184694253761477, "grad_norm": 0.06020372985501083, "learning_rate": 0.00013408970747877616, "loss": 0.846, "step": 114970 }, { "epoch": 2.018644990256149, "grad_norm": 0.053606728240258604, "learning_rate": 0.00013407920452707857, "loss": 0.8512, "step": 114980 }, { "epoch": 2.0188205551361507, "grad_norm": 0.06222561441000506, "learning_rate": 0.0001340687011562611, "loss": 0.8428, "step": 114990 }, { "epoch": 2.018996120016152, "grad_norm": 0.05435892529729845, "learning_rate": 0.00013405819736645686, "loss": 0.8489, "step": 115000 }, { "epoch": 2.019171684896153, "grad_norm": 0.04496920930288166, "learning_rate": 0.0001340476931577989, "loss": 0.8514, "step": 115010 }, { "epoch": 2.0193472497761547, "grad_norm": 0.06001170005726854, "learning_rate": 0.00013403718853042038, "loss": 0.8467, "step": 115020 }, { "epoch": 2.019522814656156, "grad_norm": 0.05831129050299066, "learning_rate": 0.00013402668348445432, "loss": 0.8475, "step": 115030 }, { "epoch": 2.0196983795361576, "grad_norm": 0.046602917701931985, "learning_rate": 0.00013401617802003387, "loss": 0.8427, "step": 115040 }, { "epoch": 2.019873944416159, "grad_norm": 0.054032970087813804, "learning_rate": 0.00013400567213729211, "loss": 0.8448, "step": 115050 }, { "epoch": 2.0200495092961606, "grad_norm": 0.058670470769464224, "learning_rate": 0.00013399516583636215, "loss": 0.8419, "step": 115060 }, { "epoch": 2.0202250741761616, "grad_norm": 0.07714800254442357, "learning_rate": 0.00013398465911737713, "loss": 0.8507, "step": 115070 }, { "epoch": 2.020400639056163, "grad_norm": 0.05243889645617048, "learning_rate": 0.00013397415198047018, "loss": 0.846, "step": 115080 }, { "epoch": 2.0205762039361645, "grad_norm": 0.05485107780486826, "learning_rate": 0.00013396364442577434, "loss": 0.8471, "step": 115090 }, { "epoch": 2.020751768816166, "grad_norm": 0.06190259203889378, "learning_rate": 0.00013395313645342286, "loss": 0.8487, "step": 115100 }, { "epoch": 2.0209273336961675, "grad_norm": 0.04696627911334669, "learning_rate": 0.0001339426280635488, "loss": 0.8485, "step": 115110 }, { "epoch": 2.021102898576169, "grad_norm": 0.08685885702083879, "learning_rate": 0.00013393211925628534, "loss": 0.8491, "step": 115120 }, { "epoch": 2.02127846345617, "grad_norm": 0.05454289342799106, "learning_rate": 0.0001339216100317656, "loss": 0.8387, "step": 115130 }, { "epoch": 2.0214540283361715, "grad_norm": 0.05337349083695242, "learning_rate": 0.0001339111003901228, "loss": 0.8476, "step": 115140 }, { "epoch": 2.021629593216173, "grad_norm": 0.058476467604367095, "learning_rate": 0.00013390059033149, "loss": 0.8471, "step": 115150 }, { "epoch": 2.0218051580961744, "grad_norm": 0.05058513493130815, "learning_rate": 0.00013389007985600043, "loss": 0.8451, "step": 115160 }, { "epoch": 2.021980722976176, "grad_norm": 0.06623775978297361, "learning_rate": 0.00013387956896378721, "loss": 0.8456, "step": 115170 }, { "epoch": 2.0221562878561774, "grad_norm": 0.05972348019226066, "learning_rate": 0.0001338690576549836, "loss": 0.8483, "step": 115180 }, { "epoch": 2.0223318527361784, "grad_norm": 0.05722414462023936, "learning_rate": 0.00013385854592972273, "loss": 0.8415, "step": 115190 }, { "epoch": 2.02250741761618, "grad_norm": 0.074708931394996, "learning_rate": 0.00013384803378813778, "loss": 0.8509, "step": 115200 }, { "epoch": 2.0226829824961814, "grad_norm": 0.05985430729877331, "learning_rate": 0.00013383752123036194, "loss": 0.8482, "step": 115210 }, { "epoch": 2.022858547376183, "grad_norm": 0.06988622583481423, "learning_rate": 0.00013382700825652837, "loss": 0.8533, "step": 115220 }, { "epoch": 2.0230341122561843, "grad_norm": 0.06480394334015178, "learning_rate": 0.00013381649486677037, "loss": 0.8475, "step": 115230 }, { "epoch": 2.023209677136186, "grad_norm": 0.0560321081684587, "learning_rate": 0.0001338059810612211, "loss": 0.8483, "step": 115240 }, { "epoch": 2.0233852420161873, "grad_norm": 0.06913458446008973, "learning_rate": 0.00013379546684001375, "loss": 0.8447, "step": 115250 }, { "epoch": 2.0235608068961883, "grad_norm": 0.06700175372101036, "learning_rate": 0.00013378495220328152, "loss": 0.8415, "step": 115260 }, { "epoch": 2.02373637177619, "grad_norm": 0.05873468343397788, "learning_rate": 0.00013377443715115766, "loss": 0.84, "step": 115270 }, { "epoch": 2.0239119366561913, "grad_norm": 0.054762037451959705, "learning_rate": 0.00013376392168377546, "loss": 0.8397, "step": 115280 }, { "epoch": 2.0240875015361928, "grad_norm": 0.04676265574368405, "learning_rate": 0.00013375340580126804, "loss": 0.842, "step": 115290 }, { "epoch": 2.0242630664161942, "grad_norm": 0.055341930259218144, "learning_rate": 0.0001337428895037687, "loss": 0.8511, "step": 115300 }, { "epoch": 2.0244386312961957, "grad_norm": 0.047587102510171564, "learning_rate": 0.00013373237279141068, "loss": 0.8459, "step": 115310 }, { "epoch": 2.0246141961761968, "grad_norm": 0.05659087013807943, "learning_rate": 0.00013372185566432724, "loss": 0.8543, "step": 115320 }, { "epoch": 2.0247897610561982, "grad_norm": 0.048208045573658355, "learning_rate": 0.0001337113381226516, "loss": 0.8456, "step": 115330 }, { "epoch": 2.0249653259361997, "grad_norm": 0.04919676323508185, "learning_rate": 0.00013370082016651703, "loss": 0.8408, "step": 115340 }, { "epoch": 2.025140890816201, "grad_norm": 0.05787053045918171, "learning_rate": 0.00013369030179605676, "loss": 0.8383, "step": 115350 }, { "epoch": 2.0253164556962027, "grad_norm": 0.04660183490528461, "learning_rate": 0.0001336797830114042, "loss": 0.8416, "step": 115360 }, { "epoch": 2.025492020576204, "grad_norm": 0.05928035975055669, "learning_rate": 0.00013366926381269247, "loss": 0.8445, "step": 115370 }, { "epoch": 2.025667585456205, "grad_norm": 0.05282926019146055, "learning_rate": 0.00013365874420005492, "loss": 0.8497, "step": 115380 }, { "epoch": 2.0258431503362067, "grad_norm": 0.04811007436059055, "learning_rate": 0.0001336482241736248, "loss": 0.8467, "step": 115390 }, { "epoch": 2.026018715216208, "grad_norm": 0.04749536452083036, "learning_rate": 0.00013363770373353546, "loss": 0.8437, "step": 115400 }, { "epoch": 2.0261942800962096, "grad_norm": 0.06518996281320225, "learning_rate": 0.00013362718287992012, "loss": 0.847, "step": 115410 }, { "epoch": 2.026369844976211, "grad_norm": 0.059305433976548284, "learning_rate": 0.00013361666161291216, "loss": 0.8502, "step": 115420 }, { "epoch": 2.0265454098562126, "grad_norm": 0.04821230808203083, "learning_rate": 0.00013360613993264483, "loss": 0.8379, "step": 115430 }, { "epoch": 2.0267209747362136, "grad_norm": 0.06640324361371842, "learning_rate": 0.00013359561783925146, "loss": 0.8429, "step": 115440 }, { "epoch": 2.026896539616215, "grad_norm": 0.05189061545948313, "learning_rate": 0.00013358509533286533, "loss": 0.8438, "step": 115450 }, { "epoch": 2.0270721044962166, "grad_norm": 0.06265166799819974, "learning_rate": 0.0001335745724136198, "loss": 0.8512, "step": 115460 }, { "epoch": 2.027247669376218, "grad_norm": 0.05987172363399539, "learning_rate": 0.00013356404908164825, "loss": 0.8386, "step": 115470 }, { "epoch": 2.0274232342562195, "grad_norm": 0.06343860259308753, "learning_rate": 0.00013355352533708393, "loss": 0.8378, "step": 115480 }, { "epoch": 2.027598799136221, "grad_norm": 0.05248691689378751, "learning_rate": 0.0001335430011800602, "loss": 0.8473, "step": 115490 }, { "epoch": 2.027774364016222, "grad_norm": 0.06930778911479617, "learning_rate": 0.00013353247661071043, "loss": 0.8448, "step": 115500 }, { "epoch": 2.0279499288962235, "grad_norm": 0.07012236197470272, "learning_rate": 0.0001335219516291679, "loss": 0.8513, "step": 115510 }, { "epoch": 2.028125493776225, "grad_norm": 0.0721906435922019, "learning_rate": 0.000133511426235566, "loss": 0.8435, "step": 115520 }, { "epoch": 2.0283010586562265, "grad_norm": 0.07284340100786416, "learning_rate": 0.00013350090043003817, "loss": 0.8455, "step": 115530 }, { "epoch": 2.028476623536228, "grad_norm": 0.06104836535884297, "learning_rate": 0.0001334903742127177, "loss": 0.8493, "step": 115540 }, { "epoch": 2.0286521884162294, "grad_norm": 0.04364158793472262, "learning_rate": 0.0001334798475837379, "loss": 0.8482, "step": 115550 }, { "epoch": 2.0288277532962304, "grad_norm": 0.050887870211579075, "learning_rate": 0.00013346932054323222, "loss": 0.8412, "step": 115560 }, { "epoch": 2.029003318176232, "grad_norm": 0.04773907218193663, "learning_rate": 0.00013345879309133404, "loss": 0.8454, "step": 115570 }, { "epoch": 2.0291788830562334, "grad_norm": 0.05345660103102053, "learning_rate": 0.00013344826522817673, "loss": 0.841, "step": 115580 }, { "epoch": 2.029354447936235, "grad_norm": 0.06247476754439574, "learning_rate": 0.0001334377369538937, "loss": 0.847, "step": 115590 }, { "epoch": 2.0295300128162364, "grad_norm": 0.062369824021922225, "learning_rate": 0.00013342720826861832, "loss": 0.8448, "step": 115600 }, { "epoch": 2.029705577696238, "grad_norm": 0.04840204781153707, "learning_rate": 0.00013341667917248398, "loss": 0.8525, "step": 115610 }, { "epoch": 2.029881142576239, "grad_norm": 0.04772455444270228, "learning_rate": 0.00013340614966562408, "loss": 0.8465, "step": 115620 }, { "epoch": 2.0300567074562403, "grad_norm": 0.056017273502248825, "learning_rate": 0.0001333956197481721, "loss": 0.8492, "step": 115630 }, { "epoch": 2.030232272336242, "grad_norm": 0.046453222512950774, "learning_rate": 0.00013338508942026137, "loss": 0.8562, "step": 115640 }, { "epoch": 2.0304078372162433, "grad_norm": 0.0483932539119913, "learning_rate": 0.0001333745586820254, "loss": 0.8459, "step": 115650 }, { "epoch": 2.0305834020962448, "grad_norm": 0.05727429214033881, "learning_rate": 0.00013336402753359755, "loss": 0.8497, "step": 115660 }, { "epoch": 2.0307589669762462, "grad_norm": 0.058478551270248265, "learning_rate": 0.00013335349597511128, "loss": 0.8454, "step": 115670 }, { "epoch": 2.0309345318562473, "grad_norm": 0.08264964847794652, "learning_rate": 0.00013334296400669996, "loss": 0.8488, "step": 115680 }, { "epoch": 2.0311100967362488, "grad_norm": 0.06769458548360013, "learning_rate": 0.0001333324316284971, "loss": 0.8442, "step": 115690 }, { "epoch": 2.0312856616162502, "grad_norm": 0.06123485408426817, "learning_rate": 0.00013332189884063616, "loss": 0.8426, "step": 115700 }, { "epoch": 2.0314612264962517, "grad_norm": 0.04830995500866124, "learning_rate": 0.00013331136564325057, "loss": 0.8587, "step": 115710 }, { "epoch": 2.031636791376253, "grad_norm": 0.05807789595576253, "learning_rate": 0.00013330083203647378, "loss": 0.8363, "step": 115720 }, { "epoch": 2.0318123562562547, "grad_norm": 0.06291654448699847, "learning_rate": 0.00013329029802043924, "loss": 0.8431, "step": 115730 }, { "epoch": 2.0319879211362557, "grad_norm": 0.0554771292764982, "learning_rate": 0.00013327976359528047, "loss": 0.849, "step": 115740 }, { "epoch": 2.032163486016257, "grad_norm": 0.06242007315761199, "learning_rate": 0.00013326922876113088, "loss": 0.844, "step": 115750 }, { "epoch": 2.0323390508962587, "grad_norm": 0.07609717724976298, "learning_rate": 0.00013325869351812398, "loss": 0.8552, "step": 115760 }, { "epoch": 2.03251461577626, "grad_norm": 0.05656908145305979, "learning_rate": 0.00013324815786639328, "loss": 0.8519, "step": 115770 }, { "epoch": 2.0326901806562616, "grad_norm": 0.059050945524467484, "learning_rate": 0.00013323762180607223, "loss": 0.8488, "step": 115780 }, { "epoch": 2.032865745536263, "grad_norm": 0.05437253578124569, "learning_rate": 0.0001332270853372943, "loss": 0.8491, "step": 115790 }, { "epoch": 2.033041310416264, "grad_norm": 0.05508091614683385, "learning_rate": 0.00013321654846019305, "loss": 0.8403, "step": 115800 }, { "epoch": 2.0332168752962656, "grad_norm": 0.04808424088507107, "learning_rate": 0.0001332060111749019, "loss": 0.8454, "step": 115810 }, { "epoch": 2.033392440176267, "grad_norm": 0.06110371474594997, "learning_rate": 0.00013319547348155452, "loss": 0.8516, "step": 115820 }, { "epoch": 2.0335680050562686, "grad_norm": 0.08323995711292181, "learning_rate": 0.00013318493538028425, "loss": 0.8467, "step": 115830 }, { "epoch": 2.03374356993627, "grad_norm": 0.049480264693308275, "learning_rate": 0.0001331743968712247, "loss": 0.8509, "step": 115840 }, { "epoch": 2.0339191348162715, "grad_norm": 0.05607720482521311, "learning_rate": 0.00013316385795450938, "loss": 0.8524, "step": 115850 }, { "epoch": 2.0340946996962725, "grad_norm": 0.04698563640020885, "learning_rate": 0.00013315331863027178, "loss": 0.8456, "step": 115860 }, { "epoch": 2.034270264576274, "grad_norm": 0.05841459968649828, "learning_rate": 0.0001331427788986455, "loss": 0.8395, "step": 115870 }, { "epoch": 2.0344458294562755, "grad_norm": 0.05181330525803007, "learning_rate": 0.00013313223875976407, "loss": 0.8414, "step": 115880 }, { "epoch": 2.034621394336277, "grad_norm": 0.05583067659385761, "learning_rate": 0.000133121698213761, "loss": 0.8486, "step": 115890 }, { "epoch": 2.0347969592162785, "grad_norm": 0.051815545555068024, "learning_rate": 0.00013311115726076983, "loss": 0.8424, "step": 115900 }, { "epoch": 2.03497252409628, "grad_norm": 0.07933082776830837, "learning_rate": 0.00013310061590092418, "loss": 0.851, "step": 115910 }, { "epoch": 2.035148088976281, "grad_norm": 0.048959342121175954, "learning_rate": 0.00013309007413435755, "loss": 0.8453, "step": 115920 }, { "epoch": 2.0353236538562824, "grad_norm": 0.06601770264334351, "learning_rate": 0.00013307953196120355, "loss": 0.8411, "step": 115930 }, { "epoch": 2.035499218736284, "grad_norm": 0.057703558929822395, "learning_rate": 0.00013306898938159574, "loss": 0.8463, "step": 115940 }, { "epoch": 2.0356747836162854, "grad_norm": 0.05055575743545241, "learning_rate": 0.00013305844639566766, "loss": 0.8475, "step": 115950 }, { "epoch": 2.035850348496287, "grad_norm": 0.06440967136082601, "learning_rate": 0.00013304790300355294, "loss": 0.8466, "step": 115960 }, { "epoch": 2.0360259133762884, "grad_norm": 0.0534708983795741, "learning_rate": 0.0001330373592053851, "loss": 0.8487, "step": 115970 }, { "epoch": 2.03620147825629, "grad_norm": 0.07710463290540008, "learning_rate": 0.00013302681500129782, "loss": 0.8386, "step": 115980 }, { "epoch": 2.036377043136291, "grad_norm": 0.06552634507337075, "learning_rate": 0.0001330162703914247, "loss": 0.8446, "step": 115990 }, { "epoch": 2.0365526080162923, "grad_norm": 0.05114791020433911, "learning_rate": 0.00013300572537589924, "loss": 0.8449, "step": 116000 }, { "epoch": 2.036728172896294, "grad_norm": 0.06631196863416539, "learning_rate": 0.00013299517995485512, "loss": 0.8479, "step": 116010 }, { "epoch": 2.0369037377762953, "grad_norm": 0.07016295146607304, "learning_rate": 0.00013298463412842596, "loss": 0.8493, "step": 116020 }, { "epoch": 2.0370793026562968, "grad_norm": 0.08126247624912136, "learning_rate": 0.0001329740878967453, "loss": 0.8469, "step": 116030 }, { "epoch": 2.0372548675362983, "grad_norm": 0.047751456064162326, "learning_rate": 0.00013296354125994685, "loss": 0.8482, "step": 116040 }, { "epoch": 2.0374304324162993, "grad_norm": 0.06139718837707965, "learning_rate": 0.00013295299421816421, "loss": 0.8519, "step": 116050 }, { "epoch": 2.0376059972963008, "grad_norm": 0.057790784702353666, "learning_rate": 0.00013294244677153105, "loss": 0.8426, "step": 116060 }, { "epoch": 2.0377815621763022, "grad_norm": 0.06385447157295256, "learning_rate": 0.00013293189892018086, "loss": 0.8419, "step": 116070 }, { "epoch": 2.0379571270563037, "grad_norm": 0.052351381233236446, "learning_rate": 0.0001329213506642475, "loss": 0.846, "step": 116080 }, { "epoch": 2.038132691936305, "grad_norm": 0.05053340485579395, "learning_rate": 0.00013291080200386444, "loss": 0.8421, "step": 116090 }, { "epoch": 2.0383082568163067, "grad_norm": 0.04618696882272358, "learning_rate": 0.00013290025293916547, "loss": 0.8449, "step": 116100 }, { "epoch": 2.0384838216963077, "grad_norm": 0.046102424827990804, "learning_rate": 0.00013288970347028413, "loss": 0.8395, "step": 116110 }, { "epoch": 2.038659386576309, "grad_norm": 0.05917354650635874, "learning_rate": 0.00013287915359735415, "loss": 0.8426, "step": 116120 }, { "epoch": 2.0388349514563107, "grad_norm": 0.05319090158827418, "learning_rate": 0.00013286860332050915, "loss": 0.8448, "step": 116130 }, { "epoch": 2.039010516336312, "grad_norm": 0.043565520765816095, "learning_rate": 0.00013285805263988287, "loss": 0.8531, "step": 116140 }, { "epoch": 2.0391860812163136, "grad_norm": 0.06266152075779538, "learning_rate": 0.00013284750155560893, "loss": 0.8527, "step": 116150 }, { "epoch": 2.039361646096315, "grad_norm": 0.05447159353044122, "learning_rate": 0.0001328369500678211, "loss": 0.8455, "step": 116160 }, { "epoch": 2.039537210976316, "grad_norm": 0.05460489749810612, "learning_rate": 0.00013282639817665298, "loss": 0.8416, "step": 116170 }, { "epoch": 2.0397127758563176, "grad_norm": 0.0660507308029412, "learning_rate": 0.0001328158458822383, "loss": 0.8476, "step": 116180 }, { "epoch": 2.039888340736319, "grad_norm": 0.07031905587848508, "learning_rate": 0.00013280529318471072, "loss": 0.8414, "step": 116190 }, { "epoch": 2.0400639056163206, "grad_norm": 0.07107260916300454, "learning_rate": 0.000132794740084204, "loss": 0.8501, "step": 116200 }, { "epoch": 2.040239470496322, "grad_norm": 0.055892353157972516, "learning_rate": 0.00013278418658085186, "loss": 0.8474, "step": 116210 }, { "epoch": 2.0404150353763235, "grad_norm": 0.05555582160874119, "learning_rate": 0.00013277363267478798, "loss": 0.835, "step": 116220 }, { "epoch": 2.0405906002563245, "grad_norm": 0.04455648663860785, "learning_rate": 0.00013276307836614608, "loss": 0.8621, "step": 116230 }, { "epoch": 2.040766165136326, "grad_norm": 0.10316197217055278, "learning_rate": 0.00013275252365505983, "loss": 0.8434, "step": 116240 }, { "epoch": 2.0409417300163275, "grad_norm": 0.057645345104377375, "learning_rate": 0.00013274196854166307, "loss": 0.8499, "step": 116250 }, { "epoch": 2.041117294896329, "grad_norm": 0.06191808016936995, "learning_rate": 0.0001327314130260895, "loss": 0.8421, "step": 116260 }, { "epoch": 2.0412928597763305, "grad_norm": 0.05573585853557654, "learning_rate": 0.00013272085710847284, "loss": 0.85, "step": 116270 }, { "epoch": 2.041468424656332, "grad_norm": 0.06133067029503345, "learning_rate": 0.00013271030078894683, "loss": 0.8478, "step": 116280 }, { "epoch": 2.041643989536333, "grad_norm": 0.05296574885081548, "learning_rate": 0.00013269974406764524, "loss": 0.8484, "step": 116290 }, { "epoch": 2.0418195544163344, "grad_norm": 0.05401344316572221, "learning_rate": 0.00013268918694470181, "loss": 0.8498, "step": 116300 }, { "epoch": 2.041995119296336, "grad_norm": 0.06569200524393778, "learning_rate": 0.0001326786294202503, "loss": 0.8473, "step": 116310 }, { "epoch": 2.0421706841763374, "grad_norm": 0.05916541825529002, "learning_rate": 0.0001326680714944245, "loss": 0.8494, "step": 116320 }, { "epoch": 2.042346249056339, "grad_norm": 0.057139597819307886, "learning_rate": 0.00013265751316735816, "loss": 0.8437, "step": 116330 }, { "epoch": 2.0425218139363404, "grad_norm": 0.04955425771372743, "learning_rate": 0.00013264695443918504, "loss": 0.8453, "step": 116340 }, { "epoch": 2.0426973788163414, "grad_norm": 0.04899030553920241, "learning_rate": 0.00013263639531003898, "loss": 0.8493, "step": 116350 }, { "epoch": 2.042872943696343, "grad_norm": 0.048653393954722284, "learning_rate": 0.00013262583578005368, "loss": 0.841, "step": 116360 }, { "epoch": 2.0430485085763443, "grad_norm": 0.05621177560871276, "learning_rate": 0.000132615275849363, "loss": 0.8521, "step": 116370 }, { "epoch": 2.043224073456346, "grad_norm": 0.07844356908001517, "learning_rate": 0.00013260471551810072, "loss": 0.8534, "step": 116380 }, { "epoch": 2.0433996383363473, "grad_norm": 0.07609001094324307, "learning_rate": 0.00013259415478640067, "loss": 0.8458, "step": 116390 }, { "epoch": 2.043575203216349, "grad_norm": 0.09192768634504529, "learning_rate": 0.00013258359365439658, "loss": 0.8503, "step": 116400 }, { "epoch": 2.04375076809635, "grad_norm": 0.07695274918346011, "learning_rate": 0.00013257303212222234, "loss": 0.8479, "step": 116410 }, { "epoch": 2.0439263329763513, "grad_norm": 0.06288379811974534, "learning_rate": 0.00013256247019001168, "loss": 0.8464, "step": 116420 }, { "epoch": 2.0441018978563528, "grad_norm": 0.04121207161087905, "learning_rate": 0.00013255190785789852, "loss": 0.8507, "step": 116430 }, { "epoch": 2.0442774627363542, "grad_norm": 0.04861349478423367, "learning_rate": 0.00013254134512601662, "loss": 0.8414, "step": 116440 }, { "epoch": 2.0444530276163557, "grad_norm": 0.06151609859613146, "learning_rate": 0.00013253078199449987, "loss": 0.8436, "step": 116450 }, { "epoch": 2.044628592496357, "grad_norm": 0.06083081639432223, "learning_rate": 0.00013252021846348202, "loss": 0.8499, "step": 116460 }, { "epoch": 2.0448041573763582, "grad_norm": 0.06300657262902802, "learning_rate": 0.000132509654533097, "loss": 0.8431, "step": 116470 }, { "epoch": 2.0449797222563597, "grad_norm": 0.054216995482307365, "learning_rate": 0.00013249909020347856, "loss": 0.8506, "step": 116480 }, { "epoch": 2.045155287136361, "grad_norm": 0.0643038824707139, "learning_rate": 0.00013248852547476065, "loss": 0.8369, "step": 116490 }, { "epoch": 2.0453308520163627, "grad_norm": 0.05644508359568222, "learning_rate": 0.0001324779603470771, "loss": 0.8473, "step": 116500 }, { "epoch": 2.045506416896364, "grad_norm": 0.05398078839871528, "learning_rate": 0.00013246739482056173, "loss": 0.8376, "step": 116510 }, { "epoch": 2.0456819817763656, "grad_norm": 0.051990521889777305, "learning_rate": 0.0001324568288953485, "loss": 0.8411, "step": 116520 }, { "epoch": 2.0458575466563667, "grad_norm": 0.08074660320320019, "learning_rate": 0.00013244626257157113, "loss": 0.85, "step": 116530 }, { "epoch": 2.046033111536368, "grad_norm": 0.06144587869852296, "learning_rate": 0.00013243569584936362, "loss": 0.8437, "step": 116540 }, { "epoch": 2.0462086764163696, "grad_norm": 0.06124286058685424, "learning_rate": 0.00013242512872885983, "loss": 0.8426, "step": 116550 }, { "epoch": 2.046384241296371, "grad_norm": 0.04826526630618794, "learning_rate": 0.00013241456121019366, "loss": 0.8481, "step": 116560 }, { "epoch": 2.0465598061763726, "grad_norm": 0.05625447415279807, "learning_rate": 0.00013240399329349895, "loss": 0.8478, "step": 116570 }, { "epoch": 2.046735371056374, "grad_norm": 0.04265975327780129, "learning_rate": 0.0001323934249789096, "loss": 0.8553, "step": 116580 }, { "epoch": 2.046910935936375, "grad_norm": 0.05077316213276536, "learning_rate": 0.00013238285626655956, "loss": 0.851, "step": 116590 }, { "epoch": 2.0470865008163766, "grad_norm": 0.06188178339830002, "learning_rate": 0.00013237228715658273, "loss": 0.8497, "step": 116600 }, { "epoch": 2.047262065696378, "grad_norm": 0.057509419825723405, "learning_rate": 0.00013236171764911302, "loss": 0.8448, "step": 116610 }, { "epoch": 2.0474376305763795, "grad_norm": 0.052022263716679305, "learning_rate": 0.00013235114774428434, "loss": 0.8516, "step": 116620 }, { "epoch": 2.047613195456381, "grad_norm": 0.09215195672826766, "learning_rate": 0.00013234057744223058, "loss": 0.8398, "step": 116630 }, { "epoch": 2.0477887603363825, "grad_norm": 0.05132276002732203, "learning_rate": 0.0001323300067430857, "loss": 0.8446, "step": 116640 }, { "epoch": 2.0479643252163835, "grad_norm": 0.06338349255615106, "learning_rate": 0.00013231943564698365, "loss": 0.8484, "step": 116650 }, { "epoch": 2.048139890096385, "grad_norm": 0.08014154596627829, "learning_rate": 0.0001323088641540583, "loss": 0.8425, "step": 116660 }, { "epoch": 2.0483154549763865, "grad_norm": 0.04420946453317965, "learning_rate": 0.0001322982922644437, "loss": 0.8482, "step": 116670 }, { "epoch": 2.048491019856388, "grad_norm": 0.05606991624814584, "learning_rate": 0.00013228771997827373, "loss": 0.8512, "step": 116680 }, { "epoch": 2.0486665847363894, "grad_norm": 0.05411675367312648, "learning_rate": 0.00013227714729568236, "loss": 0.8465, "step": 116690 }, { "epoch": 2.048842149616391, "grad_norm": 0.048158042863548425, "learning_rate": 0.0001322665742168035, "loss": 0.849, "step": 116700 }, { "epoch": 2.049017714496392, "grad_norm": 0.06056048117599479, "learning_rate": 0.00013225600074177112, "loss": 0.8585, "step": 116710 }, { "epoch": 2.0491932793763934, "grad_norm": 0.06830059597440585, "learning_rate": 0.0001322454268707193, "loss": 0.8454, "step": 116720 }, { "epoch": 2.049368844256395, "grad_norm": 0.05409421175982465, "learning_rate": 0.00013223485260378188, "loss": 0.8462, "step": 116730 }, { "epoch": 2.0495444091363964, "grad_norm": 0.04800039165187589, "learning_rate": 0.00013222427794109292, "loss": 0.8513, "step": 116740 }, { "epoch": 2.049719974016398, "grad_norm": 0.053660159594438504, "learning_rate": 0.00013221370288278637, "loss": 0.8557, "step": 116750 }, { "epoch": 2.0498955388963993, "grad_norm": 0.05242607072412516, "learning_rate": 0.0001322031274289962, "loss": 0.843, "step": 116760 }, { "epoch": 2.050071103776401, "grad_norm": 0.05094070408924866, "learning_rate": 0.0001321925515798565, "loss": 0.8457, "step": 116770 }, { "epoch": 2.050246668656402, "grad_norm": 0.06020719097212672, "learning_rate": 0.00013218197533550113, "loss": 0.8454, "step": 116780 }, { "epoch": 2.0504222335364033, "grad_norm": 0.0845442361807617, "learning_rate": 0.00013217139869606417, "loss": 0.8477, "step": 116790 }, { "epoch": 2.0505977984164048, "grad_norm": 0.0617076540077912, "learning_rate": 0.00013216082166167957, "loss": 0.8504, "step": 116800 }, { "epoch": 2.0507733632964062, "grad_norm": 0.07895446693153199, "learning_rate": 0.00013215024423248145, "loss": 0.8431, "step": 116810 }, { "epoch": 2.0509489281764077, "grad_norm": 0.05019213213758843, "learning_rate": 0.00013213966640860373, "loss": 0.8464, "step": 116820 }, { "epoch": 2.051124493056409, "grad_norm": 0.08107089982981344, "learning_rate": 0.00013212908819018048, "loss": 0.8429, "step": 116830 }, { "epoch": 2.0513000579364102, "grad_norm": 0.05832598153035355, "learning_rate": 0.00013211850957734576, "loss": 0.8434, "step": 116840 }, { "epoch": 2.0514756228164117, "grad_norm": 0.06625837677210851, "learning_rate": 0.0001321079305702335, "loss": 0.8464, "step": 116850 }, { "epoch": 2.051651187696413, "grad_norm": 0.04776299835760795, "learning_rate": 0.00013209735116897781, "loss": 0.8418, "step": 116860 }, { "epoch": 2.0518267525764147, "grad_norm": 0.06978582788693996, "learning_rate": 0.00013208677137371272, "loss": 0.8447, "step": 116870 }, { "epoch": 2.052002317456416, "grad_norm": 0.08732448344240398, "learning_rate": 0.00013207619118457228, "loss": 0.8429, "step": 116880 }, { "epoch": 2.0521778823364176, "grad_norm": 0.06254202261491476, "learning_rate": 0.00013206561060169052, "loss": 0.8442, "step": 116890 }, { "epoch": 2.0523534472164187, "grad_norm": 0.05285086836895094, "learning_rate": 0.0001320550296252016, "loss": 0.8441, "step": 116900 }, { "epoch": 2.05252901209642, "grad_norm": 0.0503620800079201, "learning_rate": 0.00013204444825523946, "loss": 0.85, "step": 116910 }, { "epoch": 2.0527045769764216, "grad_norm": 0.06008033063326506, "learning_rate": 0.0001320338664919382, "loss": 0.8453, "step": 116920 }, { "epoch": 2.052880141856423, "grad_norm": 0.050234672109880175, "learning_rate": 0.00013202328433543187, "loss": 0.8567, "step": 116930 }, { "epoch": 2.0530557067364246, "grad_norm": 0.05778919015963096, "learning_rate": 0.00013201270178585464, "loss": 0.8512, "step": 116940 }, { "epoch": 2.053231271616426, "grad_norm": 0.07014813109728066, "learning_rate": 0.00013200211884334054, "loss": 0.847, "step": 116950 }, { "epoch": 2.053406836496427, "grad_norm": 0.07730391193767756, "learning_rate": 0.00013199153550802362, "loss": 0.8497, "step": 116960 }, { "epoch": 2.0535824013764286, "grad_norm": 0.05135339323569785, "learning_rate": 0.00013198095178003797, "loss": 0.8448, "step": 116970 }, { "epoch": 2.05375796625643, "grad_norm": 0.08866676068461232, "learning_rate": 0.0001319703676595178, "loss": 0.8505, "step": 116980 }, { "epoch": 2.0539335311364315, "grad_norm": 0.051504735699953355, "learning_rate": 0.0001319597831465971, "loss": 0.8478, "step": 116990 }, { "epoch": 2.054109096016433, "grad_norm": 0.055775260413131494, "learning_rate": 0.00013194919824141, "loss": 0.8399, "step": 117000 }, { "epoch": 2.0542846608964345, "grad_norm": 0.06268497421104081, "learning_rate": 0.00013193861294409068, "loss": 0.8491, "step": 117010 }, { "epoch": 2.0544602257764355, "grad_norm": 0.0527786127006905, "learning_rate": 0.00013192802725477313, "loss": 0.8494, "step": 117020 }, { "epoch": 2.054635790656437, "grad_norm": 0.08092653836116273, "learning_rate": 0.0001319174411735916, "loss": 0.8397, "step": 117030 }, { "epoch": 2.0548113555364385, "grad_norm": 0.056513994499245664, "learning_rate": 0.00013190685470068012, "loss": 0.8456, "step": 117040 }, { "epoch": 2.05498692041644, "grad_norm": 0.04403965014468588, "learning_rate": 0.00013189626783617292, "loss": 0.8442, "step": 117050 }, { "epoch": 2.0551624852964414, "grad_norm": 0.048100450151004795, "learning_rate": 0.00013188568058020408, "loss": 0.8468, "step": 117060 }, { "epoch": 2.055338050176443, "grad_norm": 0.0687114888222507, "learning_rate": 0.0001318750929329077, "loss": 0.8513, "step": 117070 }, { "epoch": 2.055513615056444, "grad_norm": 0.05583709159471642, "learning_rate": 0.00013186450489441803, "loss": 0.8504, "step": 117080 }, { "epoch": 2.0556891799364454, "grad_norm": 0.06150920063138464, "learning_rate": 0.00013185391646486914, "loss": 0.8399, "step": 117090 }, { "epoch": 2.055864744816447, "grad_norm": 0.050187381075212915, "learning_rate": 0.0001318433276443952, "loss": 0.8482, "step": 117100 }, { "epoch": 2.0560403096964484, "grad_norm": 0.06081301717443002, "learning_rate": 0.00013183273843313042, "loss": 0.8437, "step": 117110 }, { "epoch": 2.05621587457645, "grad_norm": 0.05440428022774388, "learning_rate": 0.0001318221488312089, "loss": 0.852, "step": 117120 }, { "epoch": 2.0563914394564513, "grad_norm": 0.055174173248240366, "learning_rate": 0.00013181155883876488, "loss": 0.8451, "step": 117130 }, { "epoch": 2.0565670043364523, "grad_norm": 0.06182150727201478, "learning_rate": 0.00013180096845593248, "loss": 0.8374, "step": 117140 }, { "epoch": 2.056742569216454, "grad_norm": 0.059485033311463445, "learning_rate": 0.00013179037768284597, "loss": 0.8519, "step": 117150 }, { "epoch": 2.0569181340964553, "grad_norm": 0.05276210178432408, "learning_rate": 0.0001317797865196394, "loss": 0.8482, "step": 117160 }, { "epoch": 2.0570936989764568, "grad_norm": 0.04685972075628747, "learning_rate": 0.00013176919496644702, "loss": 0.8488, "step": 117170 }, { "epoch": 2.0572692638564583, "grad_norm": 0.06646078241010726, "learning_rate": 0.00013175860302340312, "loss": 0.8399, "step": 117180 }, { "epoch": 2.0574448287364597, "grad_norm": 0.09665695608295388, "learning_rate": 0.00013174801069064179, "loss": 0.8461, "step": 117190 }, { "epoch": 2.0576203936164608, "grad_norm": 0.06095798254836928, "learning_rate": 0.00013173741796829725, "loss": 0.8465, "step": 117200 }, { "epoch": 2.0577959584964622, "grad_norm": 0.08214773134898144, "learning_rate": 0.00013172682485650377, "loss": 0.8465, "step": 117210 }, { "epoch": 2.0579715233764637, "grad_norm": 0.04749023393663511, "learning_rate": 0.0001317162313553955, "loss": 0.841, "step": 117220 }, { "epoch": 2.058147088256465, "grad_norm": 0.06684910127792876, "learning_rate": 0.0001317056374651067, "loss": 0.8514, "step": 117230 }, { "epoch": 2.0583226531364667, "grad_norm": 0.06306280836075377, "learning_rate": 0.00013169504318577161, "loss": 0.8432, "step": 117240 }, { "epoch": 2.058498218016468, "grad_norm": 0.060018147719329715, "learning_rate": 0.00013168444851752444, "loss": 0.8498, "step": 117250 }, { "epoch": 2.058673782896469, "grad_norm": 0.04645489561283139, "learning_rate": 0.0001316738534604994, "loss": 0.8471, "step": 117260 }, { "epoch": 2.0588493477764707, "grad_norm": 0.05983014448167691, "learning_rate": 0.00013166325801483074, "loss": 0.8492, "step": 117270 }, { "epoch": 2.059024912656472, "grad_norm": 0.05836171615952909, "learning_rate": 0.00013165266218065276, "loss": 0.8495, "step": 117280 }, { "epoch": 2.0592004775364736, "grad_norm": 0.05628495813643304, "learning_rate": 0.0001316420659580997, "loss": 0.8551, "step": 117290 }, { "epoch": 2.059376042416475, "grad_norm": 0.04807645485968771, "learning_rate": 0.00013163146934730576, "loss": 0.8492, "step": 117300 }, { "epoch": 2.0595516072964766, "grad_norm": 0.08626536291749594, "learning_rate": 0.0001316208723484052, "loss": 0.8432, "step": 117310 }, { "epoch": 2.0597271721764776, "grad_norm": 0.053627977994988085, "learning_rate": 0.00013161027496153238, "loss": 0.8482, "step": 117320 }, { "epoch": 2.059902737056479, "grad_norm": 0.06905659020750213, "learning_rate": 0.00013159967718682144, "loss": 0.8438, "step": 117330 }, { "epoch": 2.0600783019364806, "grad_norm": 0.05698726476192725, "learning_rate": 0.00013158907902440677, "loss": 0.848, "step": 117340 }, { "epoch": 2.060253866816482, "grad_norm": 0.0644578509057911, "learning_rate": 0.00013157848047442262, "loss": 0.8442, "step": 117350 }, { "epoch": 2.0604294316964835, "grad_norm": 0.05535545499088577, "learning_rate": 0.00013156788153700325, "loss": 0.8449, "step": 117360 }, { "epoch": 2.060604996576485, "grad_norm": 0.07270484399188647, "learning_rate": 0.00013155728221228295, "loss": 0.8416, "step": 117370 }, { "epoch": 2.060780561456486, "grad_norm": 0.07362176247010538, "learning_rate": 0.00013154668250039603, "loss": 0.852, "step": 117380 }, { "epoch": 2.0609561263364875, "grad_norm": 0.06282169931919038, "learning_rate": 0.00013153608240147678, "loss": 0.8383, "step": 117390 }, { "epoch": 2.061131691216489, "grad_norm": 0.06238277698393166, "learning_rate": 0.00013152548191565957, "loss": 0.8477, "step": 117400 }, { "epoch": 2.0613072560964905, "grad_norm": 0.08803594550315069, "learning_rate": 0.0001315148810430786, "loss": 0.8528, "step": 117410 }, { "epoch": 2.061482820976492, "grad_norm": 0.06832259739252615, "learning_rate": 0.00013150427978386825, "loss": 0.8403, "step": 117420 }, { "epoch": 2.0616583858564934, "grad_norm": 0.05810143877432337, "learning_rate": 0.00013149367813816283, "loss": 0.8534, "step": 117430 }, { "epoch": 2.061833950736495, "grad_norm": 0.06232509240224349, "learning_rate": 0.00013148307610609666, "loss": 0.8458, "step": 117440 }, { "epoch": 2.062009515616496, "grad_norm": 0.047940697218336506, "learning_rate": 0.0001314724736878041, "loss": 0.8427, "step": 117450 }, { "epoch": 2.0621850804964974, "grad_norm": 0.04421746022950881, "learning_rate": 0.00013146187088341942, "loss": 0.8464, "step": 117460 }, { "epoch": 2.062360645376499, "grad_norm": 0.05316127648732322, "learning_rate": 0.00013145126769307703, "loss": 0.8548, "step": 117470 }, { "epoch": 2.0625362102565004, "grad_norm": 0.055929061941069474, "learning_rate": 0.00013144066411691123, "loss": 0.8498, "step": 117480 }, { "epoch": 2.062711775136502, "grad_norm": 0.05649436224928336, "learning_rate": 0.00013143006015505637, "loss": 0.8523, "step": 117490 }, { "epoch": 2.0628873400165033, "grad_norm": 0.05703313569068454, "learning_rate": 0.0001314194558076468, "loss": 0.8406, "step": 117500 }, { "epoch": 2.0630629048965043, "grad_norm": 0.06513264830100284, "learning_rate": 0.00013140885107481692, "loss": 0.8473, "step": 117510 }, { "epoch": 2.063238469776506, "grad_norm": 0.0659363644351434, "learning_rate": 0.0001313982459567011, "loss": 0.8442, "step": 117520 }, { "epoch": 2.0634140346565073, "grad_norm": 0.04387751325633889, "learning_rate": 0.00013138764045343365, "loss": 0.8425, "step": 117530 }, { "epoch": 2.063589599536509, "grad_norm": 0.07555991854982856, "learning_rate": 0.00013137703456514896, "loss": 0.8493, "step": 117540 }, { "epoch": 2.0637651644165103, "grad_norm": 0.09374892710684568, "learning_rate": 0.0001313664282919814, "loss": 0.8488, "step": 117550 }, { "epoch": 2.0639407292965117, "grad_norm": 0.06642892492447659, "learning_rate": 0.0001313558216340654, "loss": 0.853, "step": 117560 }, { "epoch": 2.0641162941765128, "grad_norm": 0.07090316859647429, "learning_rate": 0.00013134521459153532, "loss": 0.8443, "step": 117570 }, { "epoch": 2.0642918590565142, "grad_norm": 0.05603762961315771, "learning_rate": 0.0001313346071645256, "loss": 0.8371, "step": 117580 }, { "epoch": 2.0644674239365157, "grad_norm": 0.057858323781356205, "learning_rate": 0.00013132399935317056, "loss": 0.8472, "step": 117590 }, { "epoch": 2.064642988816517, "grad_norm": 0.07448642261942436, "learning_rate": 0.0001313133911576046, "loss": 0.8459, "step": 117600 }, { "epoch": 2.0648185536965187, "grad_norm": 0.0702654014668517, "learning_rate": 0.00013130278257796217, "loss": 0.8392, "step": 117610 }, { "epoch": 2.06499411857652, "grad_norm": 0.04775846753050002, "learning_rate": 0.00013129217361437773, "loss": 0.8448, "step": 117620 }, { "epoch": 2.065169683456521, "grad_norm": 0.05566351784856461, "learning_rate": 0.00013128156426698564, "loss": 0.8484, "step": 117630 }, { "epoch": 2.0653452483365227, "grad_norm": 0.04773423477891228, "learning_rate": 0.0001312709545359203, "loss": 0.848, "step": 117640 }, { "epoch": 2.065520813216524, "grad_norm": 0.055725062102700194, "learning_rate": 0.00013126034442131616, "loss": 0.8497, "step": 117650 }, { "epoch": 2.0656963780965256, "grad_norm": 0.05709561322861614, "learning_rate": 0.00013124973392330766, "loss": 0.8415, "step": 117660 }, { "epoch": 2.065871942976527, "grad_norm": 0.06911996911883099, "learning_rate": 0.00013123912304202924, "loss": 0.848, "step": 117670 }, { "epoch": 2.0660475078565286, "grad_norm": 0.06643285651695771, "learning_rate": 0.00013122851177761532, "loss": 0.849, "step": 117680 }, { "epoch": 2.0662230727365296, "grad_norm": 0.076268772533281, "learning_rate": 0.0001312179001302004, "loss": 0.8563, "step": 117690 }, { "epoch": 2.066398637616531, "grad_norm": 0.0522771140148735, "learning_rate": 0.00013120728809991886, "loss": 0.8485, "step": 117700 }, { "epoch": 2.0665742024965326, "grad_norm": 0.06906749270320302, "learning_rate": 0.0001311966756869052, "loss": 0.8474, "step": 117710 }, { "epoch": 2.066749767376534, "grad_norm": 0.06263984022231062, "learning_rate": 0.00013118606289129386, "loss": 0.8394, "step": 117720 }, { "epoch": 2.0669253322565355, "grad_norm": 0.05879904387961159, "learning_rate": 0.00013117544971321936, "loss": 0.8466, "step": 117730 }, { "epoch": 2.067100897136537, "grad_norm": 0.051766424652675454, "learning_rate": 0.00013116483615281606, "loss": 0.8504, "step": 117740 }, { "epoch": 2.067276462016538, "grad_norm": 0.06359597956084846, "learning_rate": 0.0001311542222102186, "loss": 0.8436, "step": 117750 }, { "epoch": 2.0674520268965395, "grad_norm": 0.05610101919326903, "learning_rate": 0.00013114360788556132, "loss": 0.8469, "step": 117760 }, { "epoch": 2.067627591776541, "grad_norm": 0.0534630064185257, "learning_rate": 0.0001311329931789787, "loss": 0.8446, "step": 117770 }, { "epoch": 2.0678031566565425, "grad_norm": 0.056290531078137356, "learning_rate": 0.00013112237809060533, "loss": 0.8424, "step": 117780 }, { "epoch": 2.067978721536544, "grad_norm": 0.049862569672064046, "learning_rate": 0.00013111176262057568, "loss": 0.8434, "step": 117790 }, { "epoch": 2.0681542864165454, "grad_norm": 0.04968111852191073, "learning_rate": 0.00013110114676902418, "loss": 0.8393, "step": 117800 }, { "epoch": 2.0683298512965465, "grad_norm": 0.0468386811004812, "learning_rate": 0.0001310905305360854, "loss": 0.8572, "step": 117810 }, { "epoch": 2.068505416176548, "grad_norm": 0.06506616279207952, "learning_rate": 0.00013107991392189382, "loss": 0.843, "step": 117820 }, { "epoch": 2.0686809810565494, "grad_norm": 0.04993473710354421, "learning_rate": 0.000131069296926584, "loss": 0.8431, "step": 117830 }, { "epoch": 2.068856545936551, "grad_norm": 0.06279240433223755, "learning_rate": 0.0001310586795502904, "loss": 0.8454, "step": 117840 }, { "epoch": 2.0690321108165524, "grad_norm": 0.053092119505256004, "learning_rate": 0.00013104806179314754, "loss": 0.8462, "step": 117850 }, { "epoch": 2.069207675696554, "grad_norm": 0.0609474927688844, "learning_rate": 0.00013103744365529005, "loss": 0.8447, "step": 117860 }, { "epoch": 2.069383240576555, "grad_norm": 0.04828540515928716, "learning_rate": 0.00013102682513685233, "loss": 0.8478, "step": 117870 }, { "epoch": 2.0695588054565563, "grad_norm": 0.06785773591237065, "learning_rate": 0.00013101620623796902, "loss": 0.8376, "step": 117880 }, { "epoch": 2.069734370336558, "grad_norm": 0.06572799022444999, "learning_rate": 0.00013100558695877462, "loss": 0.8534, "step": 117890 }, { "epoch": 2.0699099352165593, "grad_norm": 0.06381464243707702, "learning_rate": 0.0001309949672994036, "loss": 0.8366, "step": 117900 }, { "epoch": 2.070085500096561, "grad_norm": 0.05114644355809604, "learning_rate": 0.00013098434725999068, "loss": 0.8454, "step": 117910 }, { "epoch": 2.0702610649765623, "grad_norm": 0.051372027634937624, "learning_rate": 0.00013097372684067038, "loss": 0.8477, "step": 117920 }, { "epoch": 2.0704366298565633, "grad_norm": 0.06252157647542729, "learning_rate": 0.00013096310604157713, "loss": 0.841, "step": 117930 }, { "epoch": 2.0706121947365648, "grad_norm": 0.04968907396902904, "learning_rate": 0.00013095248486284558, "loss": 0.85, "step": 117940 }, { "epoch": 2.0707877596165662, "grad_norm": 0.06193185325779506, "learning_rate": 0.00013094186330461037, "loss": 0.8436, "step": 117950 }, { "epoch": 2.0709633244965677, "grad_norm": 0.04979606195755423, "learning_rate": 0.00013093124136700596, "loss": 0.8447, "step": 117960 }, { "epoch": 2.071138889376569, "grad_norm": 0.07014103113125919, "learning_rate": 0.00013092061905016703, "loss": 0.8517, "step": 117970 }, { "epoch": 2.0713144542565707, "grad_norm": 0.08426600380381069, "learning_rate": 0.0001309099963542281, "loss": 0.8497, "step": 117980 }, { "epoch": 2.0714900191365717, "grad_norm": 0.07930857408223287, "learning_rate": 0.00013089937327932376, "loss": 0.8454, "step": 117990 }, { "epoch": 2.071665584016573, "grad_norm": 0.05583155242913865, "learning_rate": 0.00013088874982558868, "loss": 0.8408, "step": 118000 }, { "epoch": 2.0718411488965747, "grad_norm": 0.05376811591804228, "learning_rate": 0.00013087812599315736, "loss": 0.838, "step": 118010 }, { "epoch": 2.072016713776576, "grad_norm": 0.06899213811454095, "learning_rate": 0.00013086750178216446, "loss": 0.8468, "step": 118020 }, { "epoch": 2.0721922786565776, "grad_norm": 0.07197068911579907, "learning_rate": 0.00013085687719274465, "loss": 0.8373, "step": 118030 }, { "epoch": 2.072367843536579, "grad_norm": 0.04939052749696648, "learning_rate": 0.00013084625222503242, "loss": 0.8434, "step": 118040 }, { "epoch": 2.07254340841658, "grad_norm": 0.05966030372195348, "learning_rate": 0.00013083562687916247, "loss": 0.8467, "step": 118050 }, { "epoch": 2.0727189732965816, "grad_norm": 0.04853340362247222, "learning_rate": 0.00013082500115526942, "loss": 0.8461, "step": 118060 }, { "epoch": 2.072894538176583, "grad_norm": 0.056299221822315025, "learning_rate": 0.0001308143750534879, "loss": 0.846, "step": 118070 }, { "epoch": 2.0730701030565846, "grad_norm": 0.05473082342763264, "learning_rate": 0.00013080374857395253, "loss": 0.8433, "step": 118080 }, { "epoch": 2.073245667936586, "grad_norm": 0.05647424957894315, "learning_rate": 0.00013079312171679794, "loss": 0.8512, "step": 118090 }, { "epoch": 2.0734212328165875, "grad_norm": 0.07209340114511861, "learning_rate": 0.0001307824944821588, "loss": 0.845, "step": 118100 }, { "epoch": 2.073596797696589, "grad_norm": 0.06360679555317258, "learning_rate": 0.00013077186687016973, "loss": 0.8485, "step": 118110 }, { "epoch": 2.07377236257659, "grad_norm": 0.06348647797796021, "learning_rate": 0.00013076123888096537, "loss": 0.849, "step": 118120 }, { "epoch": 2.0739479274565915, "grad_norm": 0.045860652147728935, "learning_rate": 0.00013075061051468048, "loss": 0.8409, "step": 118130 }, { "epoch": 2.074123492336593, "grad_norm": 0.05852068411308971, "learning_rate": 0.00013073998177144962, "loss": 0.8461, "step": 118140 }, { "epoch": 2.0742990572165945, "grad_norm": 0.06830147591800605, "learning_rate": 0.00013072935265140753, "loss": 0.8437, "step": 118150 }, { "epoch": 2.074474622096596, "grad_norm": 0.06672198765029379, "learning_rate": 0.0001307187231546888, "loss": 0.8494, "step": 118160 }, { "epoch": 2.074650186976597, "grad_norm": 0.06288729578773182, "learning_rate": 0.00013070809328142815, "loss": 0.8452, "step": 118170 }, { "epoch": 2.0748257518565985, "grad_norm": 0.07343908060771329, "learning_rate": 0.00013069746303176027, "loss": 0.8503, "step": 118180 }, { "epoch": 2.0750013167366, "grad_norm": 0.04944791184807578, "learning_rate": 0.00013068683240581984, "loss": 0.8413, "step": 118190 }, { "epoch": 2.0751768816166014, "grad_norm": 0.05589806142155868, "learning_rate": 0.0001306762014037416, "loss": 0.8473, "step": 118200 }, { "epoch": 2.075352446496603, "grad_norm": 0.05085676597573957, "learning_rate": 0.00013066557002566016, "loss": 0.844, "step": 118210 }, { "epoch": 2.0755280113766044, "grad_norm": 0.06695396334319004, "learning_rate": 0.00013065493827171026, "loss": 0.8525, "step": 118220 }, { "epoch": 2.075703576256606, "grad_norm": 0.06160484016234548, "learning_rate": 0.0001306443061420266, "loss": 0.8482, "step": 118230 }, { "epoch": 2.075879141136607, "grad_norm": 0.05013514532133713, "learning_rate": 0.0001306336736367439, "loss": 0.8549, "step": 118240 }, { "epoch": 2.0760547060166084, "grad_norm": 0.09175940893952178, "learning_rate": 0.0001306230407559969, "loss": 0.8505, "step": 118250 }, { "epoch": 2.07623027089661, "grad_norm": 0.05997439304405328, "learning_rate": 0.00013061240749992028, "loss": 0.8428, "step": 118260 }, { "epoch": 2.0764058357766113, "grad_norm": 0.04738890772238235, "learning_rate": 0.00013060177386864881, "loss": 0.848, "step": 118270 }, { "epoch": 2.076581400656613, "grad_norm": 0.04170677429624133, "learning_rate": 0.00013059113986231715, "loss": 0.8442, "step": 118280 }, { "epoch": 2.0767569655366143, "grad_norm": 0.05423571512654899, "learning_rate": 0.00013058050548106007, "loss": 0.8384, "step": 118290 }, { "epoch": 2.0769325304166153, "grad_norm": 0.05725474898382126, "learning_rate": 0.00013056987072501236, "loss": 0.8445, "step": 118300 }, { "epoch": 2.0771080952966168, "grad_norm": 0.05611621702683366, "learning_rate": 0.00013055923559430872, "loss": 0.8507, "step": 118310 }, { "epoch": 2.0772836601766183, "grad_norm": 0.07002228811568009, "learning_rate": 0.00013054860008908386, "loss": 0.8545, "step": 118320 }, { "epoch": 2.0774592250566197, "grad_norm": 0.04908871339609314, "learning_rate": 0.00013053796420947261, "loss": 0.8425, "step": 118330 }, { "epoch": 2.077634789936621, "grad_norm": 0.04695805276124948, "learning_rate": 0.00013052732795560968, "loss": 0.8457, "step": 118340 }, { "epoch": 2.0778103548166227, "grad_norm": 0.048136985618847015, "learning_rate": 0.00013051669132762984, "loss": 0.839, "step": 118350 }, { "epoch": 2.0779859196966237, "grad_norm": 0.04672687878391958, "learning_rate": 0.00013050605432566787, "loss": 0.844, "step": 118360 }, { "epoch": 2.078161484576625, "grad_norm": 0.07708039680252794, "learning_rate": 0.00013049541694985854, "loss": 0.8465, "step": 118370 }, { "epoch": 2.0783370494566267, "grad_norm": 0.05080157488052207, "learning_rate": 0.00013048477920033662, "loss": 0.8457, "step": 118380 }, { "epoch": 2.078512614336628, "grad_norm": 0.055787916595614266, "learning_rate": 0.0001304741410772369, "loss": 0.8425, "step": 118390 }, { "epoch": 2.0786881792166296, "grad_norm": 0.05644821785070292, "learning_rate": 0.00013046350258069417, "loss": 0.8541, "step": 118400 }, { "epoch": 2.078863744096631, "grad_norm": 0.07410387094627895, "learning_rate": 0.00013045286371084317, "loss": 0.8443, "step": 118410 }, { "epoch": 2.079039308976632, "grad_norm": 0.054601319248659276, "learning_rate": 0.0001304422244678188, "loss": 0.8509, "step": 118420 }, { "epoch": 2.0792148738566336, "grad_norm": 0.07200270745868438, "learning_rate": 0.0001304315848517558, "loss": 0.8518, "step": 118430 }, { "epoch": 2.079390438736635, "grad_norm": 0.06647522303922543, "learning_rate": 0.00013042094486278896, "loss": 0.8512, "step": 118440 }, { "epoch": 2.0795660036166366, "grad_norm": 0.05419612894348638, "learning_rate": 0.0001304103045010531, "loss": 0.852, "step": 118450 }, { "epoch": 2.079741568496638, "grad_norm": 0.05746220263040244, "learning_rate": 0.00013039966376668307, "loss": 0.8484, "step": 118460 }, { "epoch": 2.0799171333766395, "grad_norm": 0.06417366374717477, "learning_rate": 0.00013038902265981366, "loss": 0.8498, "step": 118470 }, { "epoch": 2.0800926982566406, "grad_norm": 0.06874477077958871, "learning_rate": 0.0001303783811805797, "loss": 0.8392, "step": 118480 }, { "epoch": 2.080268263136642, "grad_norm": 0.04878451626997051, "learning_rate": 0.00013036773932911602, "loss": 0.8419, "step": 118490 }, { "epoch": 2.0804438280166435, "grad_norm": 0.05295461265971048, "learning_rate": 0.00013035709710555743, "loss": 0.8478, "step": 118500 }, { "epoch": 2.080619392896645, "grad_norm": 0.05566465215360677, "learning_rate": 0.00013034645451003883, "loss": 0.8508, "step": 118510 }, { "epoch": 2.0807949577766465, "grad_norm": 0.055895124126773825, "learning_rate": 0.00013033581154269497, "loss": 0.8415, "step": 118520 }, { "epoch": 2.080970522656648, "grad_norm": 0.07352915255953853, "learning_rate": 0.0001303251682036608, "loss": 0.8416, "step": 118530 }, { "epoch": 2.081146087536649, "grad_norm": 0.05644313845276074, "learning_rate": 0.00013031452449307115, "loss": 0.8486, "step": 118540 }, { "epoch": 2.0813216524166505, "grad_norm": 0.05789895821678486, "learning_rate": 0.0001303038804110608, "loss": 0.854, "step": 118550 }, { "epoch": 2.081497217296652, "grad_norm": 0.04451223502273039, "learning_rate": 0.00013029323595776471, "loss": 0.8455, "step": 118560 }, { "epoch": 2.0816727821766534, "grad_norm": 0.0472715382705794, "learning_rate": 0.00013028259113331768, "loss": 0.8471, "step": 118570 }, { "epoch": 2.081848347056655, "grad_norm": 0.06057419246399166, "learning_rate": 0.00013027194593785463, "loss": 0.8472, "step": 118580 }, { "epoch": 2.0820239119366564, "grad_norm": 0.038454005591746665, "learning_rate": 0.0001302613003715104, "loss": 0.8512, "step": 118590 }, { "epoch": 2.0821994768166574, "grad_norm": 0.0523775493078008, "learning_rate": 0.00013025065443441993, "loss": 0.8499, "step": 118600 }, { "epoch": 2.082375041696659, "grad_norm": 0.06044270087472284, "learning_rate": 0.00013024000812671805, "loss": 0.8483, "step": 118610 }, { "epoch": 2.0825506065766604, "grad_norm": 0.0444396072175837, "learning_rate": 0.00013022936144853963, "loss": 0.8388, "step": 118620 }, { "epoch": 2.082726171456662, "grad_norm": 0.0523506241054172, "learning_rate": 0.00013021871440001962, "loss": 0.8507, "step": 118630 }, { "epoch": 2.0829017363366633, "grad_norm": 0.047146156806259346, "learning_rate": 0.00013020806698129286, "loss": 0.8468, "step": 118640 }, { "epoch": 2.083077301216665, "grad_norm": 0.05945772526434581, "learning_rate": 0.00013019741919249433, "loss": 0.8419, "step": 118650 }, { "epoch": 2.083252866096666, "grad_norm": 0.06684334870738119, "learning_rate": 0.0001301867710337589, "loss": 0.8392, "step": 118660 }, { "epoch": 2.0834284309766673, "grad_norm": 0.07477604644118806, "learning_rate": 0.00013017612250522148, "loss": 0.8474, "step": 118670 }, { "epoch": 2.083603995856669, "grad_norm": 0.06423417350365533, "learning_rate": 0.000130165473607017, "loss": 0.8414, "step": 118680 }, { "epoch": 2.0837795607366703, "grad_norm": 0.0536944698029669, "learning_rate": 0.00013015482433928038, "loss": 0.8448, "step": 118690 }, { "epoch": 2.0839551256166717, "grad_norm": 0.04551269512970482, "learning_rate": 0.0001301441747021465, "loss": 0.8483, "step": 118700 }, { "epoch": 2.084130690496673, "grad_norm": 0.060944129837585306, "learning_rate": 0.0001301335246957504, "loss": 0.8496, "step": 118710 }, { "epoch": 2.0843062553766742, "grad_norm": 0.051701522725089225, "learning_rate": 0.000130122874320227, "loss": 0.8486, "step": 118720 }, { "epoch": 2.0844818202566757, "grad_norm": 0.06206857779416364, "learning_rate": 0.00013011222357571113, "loss": 0.8481, "step": 118730 }, { "epoch": 2.084657385136677, "grad_norm": 0.06482821548729743, "learning_rate": 0.0001301015724623378, "loss": 0.8447, "step": 118740 }, { "epoch": 2.0848329500166787, "grad_norm": 0.05120482776346574, "learning_rate": 0.000130090920980242, "loss": 0.8442, "step": 118750 }, { "epoch": 2.08500851489668, "grad_norm": 0.05424320640413045, "learning_rate": 0.00013008026912955867, "loss": 0.8449, "step": 118760 }, { "epoch": 2.0851840797766816, "grad_norm": 0.06726648813085498, "learning_rate": 0.00013006961691042272, "loss": 0.8499, "step": 118770 }, { "epoch": 2.0853596446566827, "grad_norm": 0.06255846654018957, "learning_rate": 0.0001300589643229692, "loss": 0.8449, "step": 118780 }, { "epoch": 2.085535209536684, "grad_norm": 0.04957427466986319, "learning_rate": 0.000130048311367333, "loss": 0.8395, "step": 118790 }, { "epoch": 2.0857107744166856, "grad_norm": 0.04741331489984336, "learning_rate": 0.00013003765804364914, "loss": 0.8403, "step": 118800 }, { "epoch": 2.085886339296687, "grad_norm": 0.059287024615710934, "learning_rate": 0.00013002700435205255, "loss": 0.8475, "step": 118810 }, { "epoch": 2.0860619041766886, "grad_norm": 0.05298919594229593, "learning_rate": 0.0001300163502926783, "loss": 0.8437, "step": 118820 }, { "epoch": 2.08623746905669, "grad_norm": 0.058677960794733704, "learning_rate": 0.0001300056958656613, "loss": 0.8421, "step": 118830 }, { "epoch": 2.086413033936691, "grad_norm": 0.0899647270889374, "learning_rate": 0.0001299950410711366, "loss": 0.8478, "step": 118840 }, { "epoch": 2.0865885988166926, "grad_norm": 0.05651325758564085, "learning_rate": 0.00012998438590923917, "loss": 0.8426, "step": 118850 }, { "epoch": 2.086764163696694, "grad_norm": 0.047521045145038174, "learning_rate": 0.00012997373038010397, "loss": 0.8504, "step": 118860 }, { "epoch": 2.0869397285766955, "grad_norm": 0.055074486689884156, "learning_rate": 0.00012996307448386607, "loss": 0.8387, "step": 118870 }, { "epoch": 2.087115293456697, "grad_norm": 0.05790227964044634, "learning_rate": 0.00012995241822066054, "loss": 0.8542, "step": 118880 }, { "epoch": 2.0872908583366985, "grad_norm": 0.04792861610532143, "learning_rate": 0.00012994176159062223, "loss": 0.8466, "step": 118890 }, { "epoch": 2.0874664232167, "grad_norm": 0.05088836514725931, "learning_rate": 0.0001299311045938863, "loss": 0.8433, "step": 118900 }, { "epoch": 2.087641988096701, "grad_norm": 0.058928982018121856, "learning_rate": 0.00012992044723058772, "loss": 0.8456, "step": 118910 }, { "epoch": 2.0878175529767025, "grad_norm": 0.14996525471979177, "learning_rate": 0.0001299097895008615, "loss": 0.8356, "step": 118920 }, { "epoch": 2.087993117856704, "grad_norm": 0.057527029766592915, "learning_rate": 0.00012989913140484276, "loss": 0.8467, "step": 118930 }, { "epoch": 2.0881686827367054, "grad_norm": 0.062479208861357834, "learning_rate": 0.0001298884729426665, "loss": 0.8517, "step": 118940 }, { "epoch": 2.088344247616707, "grad_norm": 0.0598885970714651, "learning_rate": 0.0001298778141144677, "loss": 0.8479, "step": 118950 }, { "epoch": 2.0885198124967084, "grad_norm": 0.047131678062513546, "learning_rate": 0.00012986715492038146, "loss": 0.8389, "step": 118960 }, { "epoch": 2.0886953773767094, "grad_norm": 0.05034395564671304, "learning_rate": 0.00012985649536054287, "loss": 0.8494, "step": 118970 }, { "epoch": 2.088870942256711, "grad_norm": 0.08800170177354992, "learning_rate": 0.00012984583543508693, "loss": 0.8469, "step": 118980 }, { "epoch": 2.0890465071367124, "grad_norm": 0.0813289232223802, "learning_rate": 0.00012983517514414874, "loss": 0.84, "step": 118990 }, { "epoch": 2.089222072016714, "grad_norm": 0.07425434050543239, "learning_rate": 0.00012982451448786335, "loss": 0.8505, "step": 119000 }, { "epoch": 2.0893976368967153, "grad_norm": 0.08887626486826802, "learning_rate": 0.0001298138534663658, "loss": 0.8415, "step": 119010 }, { "epoch": 2.089573201776717, "grad_norm": 0.05114025308548986, "learning_rate": 0.00012980319207979126, "loss": 0.8433, "step": 119020 }, { "epoch": 2.089748766656718, "grad_norm": 0.05989045765072727, "learning_rate": 0.00012979253032827472, "loss": 0.8432, "step": 119030 }, { "epoch": 2.0899243315367193, "grad_norm": 0.06750992564981907, "learning_rate": 0.00012978186821195132, "loss": 0.8462, "step": 119040 }, { "epoch": 2.090099896416721, "grad_norm": 0.043925868510633265, "learning_rate": 0.00012977120573095616, "loss": 0.8437, "step": 119050 }, { "epoch": 2.0902754612967223, "grad_norm": 0.058341948424190555, "learning_rate": 0.00012976054288542426, "loss": 0.8489, "step": 119060 }, { "epoch": 2.0904510261767237, "grad_norm": 0.043171138671992675, "learning_rate": 0.00012974987967549078, "loss": 0.8453, "step": 119070 }, { "epoch": 2.090626591056725, "grad_norm": 0.05530716073458449, "learning_rate": 0.00012973921610129083, "loss": 0.8439, "step": 119080 }, { "epoch": 2.0908021559367262, "grad_norm": 0.0598933000504145, "learning_rate": 0.0001297285521629595, "loss": 0.8443, "step": 119090 }, { "epoch": 2.0909777208167277, "grad_norm": 0.05296229222252606, "learning_rate": 0.0001297178878606319, "loss": 0.8435, "step": 119100 }, { "epoch": 2.091153285696729, "grad_norm": 0.06317692273949202, "learning_rate": 0.00012970722319444316, "loss": 0.849, "step": 119110 }, { "epoch": 2.0913288505767307, "grad_norm": 0.05176946521246532, "learning_rate": 0.0001296965581645284, "loss": 0.8403, "step": 119120 }, { "epoch": 2.091504415456732, "grad_norm": 0.05479756542991588, "learning_rate": 0.00012968589277102276, "loss": 0.8527, "step": 119130 }, { "epoch": 2.0916799803367336, "grad_norm": 0.048594726292483126, "learning_rate": 0.00012967522701406135, "loss": 0.8507, "step": 119140 }, { "epoch": 2.0918555452167347, "grad_norm": 0.04464203382567726, "learning_rate": 0.00012966456089377932, "loss": 0.8554, "step": 119150 }, { "epoch": 2.092031110096736, "grad_norm": 0.0501921797257226, "learning_rate": 0.00012965389441031182, "loss": 0.8475, "step": 119160 }, { "epoch": 2.0922066749767376, "grad_norm": 0.08725694317022412, "learning_rate": 0.00012964322756379399, "loss": 0.8396, "step": 119170 }, { "epoch": 2.092382239856739, "grad_norm": 0.07343568388306458, "learning_rate": 0.00012963256035436094, "loss": 0.8492, "step": 119180 }, { "epoch": 2.0925578047367406, "grad_norm": 0.05239381402519818, "learning_rate": 0.00012962189278214792, "loss": 0.8422, "step": 119190 }, { "epoch": 2.092733369616742, "grad_norm": 0.05894281284536644, "learning_rate": 0.00012961122484728995, "loss": 0.844, "step": 119200 }, { "epoch": 2.092908934496743, "grad_norm": 0.060772315019222214, "learning_rate": 0.00012960055654992234, "loss": 0.8534, "step": 119210 }, { "epoch": 2.0930844993767446, "grad_norm": 0.07340050083328287, "learning_rate": 0.00012958988789018024, "loss": 0.8413, "step": 119220 }, { "epoch": 2.093260064256746, "grad_norm": 0.054145156728760746, "learning_rate": 0.0001295792188681987, "loss": 0.8448, "step": 119230 }, { "epoch": 2.0934356291367475, "grad_norm": 0.06329412344500263, "learning_rate": 0.000129568549484113, "loss": 0.8475, "step": 119240 }, { "epoch": 2.093611194016749, "grad_norm": 0.06025345153675746, "learning_rate": 0.00012955787973805834, "loss": 0.8436, "step": 119250 }, { "epoch": 2.0937867588967505, "grad_norm": 0.06009372035563773, "learning_rate": 0.00012954720963016976, "loss": 0.8387, "step": 119260 }, { "epoch": 2.0939623237767515, "grad_norm": 0.057737614941348686, "learning_rate": 0.0001295365391605827, "loss": 0.8502, "step": 119270 }, { "epoch": 2.094137888656753, "grad_norm": 0.052606189309477906, "learning_rate": 0.00012952586832943217, "loss": 0.8405, "step": 119280 }, { "epoch": 2.0943134535367545, "grad_norm": 0.06512257055977859, "learning_rate": 0.00012951519713685341, "loss": 0.8416, "step": 119290 }, { "epoch": 2.094489018416756, "grad_norm": 0.06261862558368836, "learning_rate": 0.00012950452558298166, "loss": 0.8384, "step": 119300 }, { "epoch": 2.0946645832967574, "grad_norm": 0.07478950630296173, "learning_rate": 0.00012949385366795206, "loss": 0.8428, "step": 119310 }, { "epoch": 2.094840148176759, "grad_norm": 0.04501536766710995, "learning_rate": 0.0001294831813918999, "loss": 0.8411, "step": 119320 }, { "epoch": 2.09501571305676, "grad_norm": 0.05209167569663294, "learning_rate": 0.00012947250875496038, "loss": 0.8384, "step": 119330 }, { "epoch": 2.0951912779367614, "grad_norm": 0.05954363979486522, "learning_rate": 0.00012946183575726874, "loss": 0.8419, "step": 119340 }, { "epoch": 2.095366842816763, "grad_norm": 0.05540306952595872, "learning_rate": 0.00012945116239896013, "loss": 0.8461, "step": 119350 }, { "epoch": 2.0955424076967644, "grad_norm": 0.055658991680472454, "learning_rate": 0.00012944048868016988, "loss": 0.8411, "step": 119360 }, { "epoch": 2.095717972576766, "grad_norm": 0.07415580308536401, "learning_rate": 0.0001294298146010332, "loss": 0.8457, "step": 119370 }, { "epoch": 2.0958935374567673, "grad_norm": 0.05875727495888005, "learning_rate": 0.00012941914016168527, "loss": 0.8493, "step": 119380 }, { "epoch": 2.0960691023367684, "grad_norm": 0.05849005890618339, "learning_rate": 0.00012940846536226144, "loss": 0.8494, "step": 119390 }, { "epoch": 2.09624466721677, "grad_norm": 0.06283955390024872, "learning_rate": 0.00012939779020289686, "loss": 0.8439, "step": 119400 }, { "epoch": 2.0964202320967713, "grad_norm": 0.051115569405675625, "learning_rate": 0.00012938711468372688, "loss": 0.8412, "step": 119410 }, { "epoch": 2.096595796976773, "grad_norm": 0.06397583207457816, "learning_rate": 0.0001293764388048867, "loss": 0.8498, "step": 119420 }, { "epoch": 2.0967713618567743, "grad_norm": 0.06577406594790697, "learning_rate": 0.00012936576256651156, "loss": 0.8449, "step": 119430 }, { "epoch": 2.0969469267367757, "grad_norm": 0.060924461291992436, "learning_rate": 0.00012935508596873685, "loss": 0.8528, "step": 119440 }, { "epoch": 2.0971224916167768, "grad_norm": 0.054585407820245097, "learning_rate": 0.0001293444090116977, "loss": 0.8496, "step": 119450 }, { "epoch": 2.0972980564967783, "grad_norm": 0.049489830812382354, "learning_rate": 0.00012933373169552953, "loss": 0.8443, "step": 119460 }, { "epoch": 2.0974736213767797, "grad_norm": 0.047651022594085574, "learning_rate": 0.00012932305402036745, "loss": 0.8477, "step": 119470 }, { "epoch": 2.097649186256781, "grad_norm": 0.044677718240568154, "learning_rate": 0.00012931237598634693, "loss": 0.8484, "step": 119480 }, { "epoch": 2.0978247511367827, "grad_norm": 0.04985374384716653, "learning_rate": 0.00012930169759360312, "loss": 0.8501, "step": 119490 }, { "epoch": 2.098000316016784, "grad_norm": 0.06536676565661524, "learning_rate": 0.00012929101884227143, "loss": 0.8477, "step": 119500 }, { "epoch": 2.098175880896785, "grad_norm": 0.05227888575025538, "learning_rate": 0.0001292803397324871, "loss": 0.8443, "step": 119510 }, { "epoch": 2.0983514457767867, "grad_norm": 0.05741158065631572, "learning_rate": 0.00012926966026438543, "loss": 0.8446, "step": 119520 }, { "epoch": 2.098527010656788, "grad_norm": 0.0555165870991368, "learning_rate": 0.00012925898043810173, "loss": 0.8456, "step": 119530 }, { "epoch": 2.0987025755367896, "grad_norm": 0.06065128829062834, "learning_rate": 0.00012924830025377136, "loss": 0.8422, "step": 119540 }, { "epoch": 2.098878140416791, "grad_norm": 0.056264711310855905, "learning_rate": 0.00012923761971152958, "loss": 0.8474, "step": 119550 }, { "epoch": 2.0990537052967926, "grad_norm": 0.07159646306618508, "learning_rate": 0.00012922693881151182, "loss": 0.8407, "step": 119560 }, { "epoch": 2.099229270176794, "grad_norm": 0.05803496818593126, "learning_rate": 0.00012921625755385328, "loss": 0.8448, "step": 119570 }, { "epoch": 2.099404835056795, "grad_norm": 0.05559012646563602, "learning_rate": 0.00012920557593868934, "loss": 0.8489, "step": 119580 }, { "epoch": 2.0995803999367966, "grad_norm": 0.06787745363183025, "learning_rate": 0.00012919489396615537, "loss": 0.8429, "step": 119590 }, { "epoch": 2.099755964816798, "grad_norm": 0.06769318875925688, "learning_rate": 0.00012918421163638665, "loss": 0.8452, "step": 119600 }, { "epoch": 2.0999315296967995, "grad_norm": 0.04795884508436189, "learning_rate": 0.00012917352894951863, "loss": 0.8416, "step": 119610 }, { "epoch": 2.100107094576801, "grad_norm": 0.055808325945215106, "learning_rate": 0.00012916284590568656, "loss": 0.8491, "step": 119620 }, { "epoch": 2.100282659456802, "grad_norm": 0.06505995974052305, "learning_rate": 0.00012915216250502587, "loss": 0.8484, "step": 119630 }, { "epoch": 2.1004582243368035, "grad_norm": 0.05415543982621401, "learning_rate": 0.00012914147874767184, "loss": 0.8426, "step": 119640 }, { "epoch": 2.100633789216805, "grad_norm": 0.056809176685881965, "learning_rate": 0.00012913079463375992, "loss": 0.842, "step": 119650 }, { "epoch": 2.1008093540968065, "grad_norm": 0.06017411354021387, "learning_rate": 0.00012912011016342538, "loss": 0.8459, "step": 119660 }, { "epoch": 2.100984918976808, "grad_norm": 0.05263617757758233, "learning_rate": 0.00012910942533680373, "loss": 0.8435, "step": 119670 }, { "epoch": 2.1011604838568094, "grad_norm": 0.055137533743825996, "learning_rate": 0.00012909874015403023, "loss": 0.8556, "step": 119680 }, { "epoch": 2.101336048736811, "grad_norm": 0.0640425422246378, "learning_rate": 0.00012908805461524033, "loss": 0.8582, "step": 119690 }, { "epoch": 2.101511613616812, "grad_norm": 0.06329322398041959, "learning_rate": 0.00012907736872056936, "loss": 0.842, "step": 119700 }, { "epoch": 2.1016871784968134, "grad_norm": 0.0776990387663314, "learning_rate": 0.00012906668247015277, "loss": 0.8421, "step": 119710 }, { "epoch": 2.101862743376815, "grad_norm": 0.061480815767956054, "learning_rate": 0.00012905599586412594, "loss": 0.8422, "step": 119720 }, { "epoch": 2.1020383082568164, "grad_norm": 0.052681379775825786, "learning_rate": 0.0001290453089026243, "loss": 0.8499, "step": 119730 }, { "epoch": 2.102213873136818, "grad_norm": 0.07958251706693292, "learning_rate": 0.00012903462158578316, "loss": 0.8479, "step": 119740 }, { "epoch": 2.1023894380168193, "grad_norm": 0.07647845522021782, "learning_rate": 0.00012902393391373798, "loss": 0.8441, "step": 119750 }, { "epoch": 2.1025650028968204, "grad_norm": 0.060657759464783294, "learning_rate": 0.00012901324588662422, "loss": 0.8465, "step": 119760 }, { "epoch": 2.102740567776822, "grad_norm": 0.04467368215676108, "learning_rate": 0.00012900255750457725, "loss": 0.8415, "step": 119770 }, { "epoch": 2.1029161326568233, "grad_norm": 0.09038001991499593, "learning_rate": 0.00012899186876773252, "loss": 0.8505, "step": 119780 }, { "epoch": 2.103091697536825, "grad_norm": 0.05781097887935857, "learning_rate": 0.00012898117967622547, "loss": 0.8414, "step": 119790 }, { "epoch": 2.1032672624168263, "grad_norm": 0.07698853382539673, "learning_rate": 0.0001289704902301915, "loss": 0.8497, "step": 119800 }, { "epoch": 2.1034428272968277, "grad_norm": 0.044097110603553175, "learning_rate": 0.00012895980042976603, "loss": 0.845, "step": 119810 }, { "epoch": 2.1036183921768288, "grad_norm": 0.05465333765803815, "learning_rate": 0.00012894911027508458, "loss": 0.8408, "step": 119820 }, { "epoch": 2.1037939570568303, "grad_norm": 0.04826868810991105, "learning_rate": 0.00012893841976628252, "loss": 0.8507, "step": 119830 }, { "epoch": 2.1039695219368317, "grad_norm": 0.051415061001360486, "learning_rate": 0.00012892772890349532, "loss": 0.8449, "step": 119840 }, { "epoch": 2.104145086816833, "grad_norm": 0.06598451844707298, "learning_rate": 0.00012891703768685842, "loss": 0.8549, "step": 119850 }, { "epoch": 2.1043206516968347, "grad_norm": 0.05332056004641161, "learning_rate": 0.00012890634611650736, "loss": 0.8459, "step": 119860 }, { "epoch": 2.104496216576836, "grad_norm": 0.06793736112583504, "learning_rate": 0.0001288956541925775, "loss": 0.8498, "step": 119870 }, { "epoch": 2.104671781456837, "grad_norm": 0.06387928480327408, "learning_rate": 0.00012888496191520437, "loss": 0.8435, "step": 119880 }, { "epoch": 2.1048473463368387, "grad_norm": 0.056989975833406994, "learning_rate": 0.0001288742692845234, "loss": 0.8503, "step": 119890 }, { "epoch": 2.10502291121684, "grad_norm": 0.059923914731861906, "learning_rate": 0.00012886357630067017, "loss": 0.8463, "step": 119900 }, { "epoch": 2.1051984760968416, "grad_norm": 0.08033871531544783, "learning_rate": 0.00012885288296378005, "loss": 0.8515, "step": 119910 }, { "epoch": 2.105374040976843, "grad_norm": 0.04665483280237207, "learning_rate": 0.00012884218927398854, "loss": 0.837, "step": 119920 }, { "epoch": 2.1055496058568446, "grad_norm": 0.05424304557822585, "learning_rate": 0.00012883149523143115, "loss": 0.8501, "step": 119930 }, { "epoch": 2.1057251707368456, "grad_norm": 0.06732183700281143, "learning_rate": 0.0001288208008362434, "loss": 0.8426, "step": 119940 }, { "epoch": 2.105900735616847, "grad_norm": 0.0719062333884648, "learning_rate": 0.00012881010608856082, "loss": 0.846, "step": 119950 }, { "epoch": 2.1060763004968486, "grad_norm": 0.0493532925888085, "learning_rate": 0.0001287994109885188, "loss": 0.8404, "step": 119960 }, { "epoch": 2.10625186537685, "grad_norm": 0.047877687344783953, "learning_rate": 0.00012878871553625296, "loss": 0.8537, "step": 119970 }, { "epoch": 2.1064274302568515, "grad_norm": 0.05949690335462631, "learning_rate": 0.00012877801973189872, "loss": 0.8543, "step": 119980 }, { "epoch": 2.106602995136853, "grad_norm": 0.04639837670129455, "learning_rate": 0.0001287673235755917, "loss": 0.8395, "step": 119990 }, { "epoch": 2.106778560016854, "grad_norm": 0.07720085082431946, "learning_rate": 0.00012875662706746734, "loss": 0.851, "step": 120000 }, { "epoch": 2.1069541248968555, "grad_norm": 0.11848386472478743, "learning_rate": 0.00012874593020766118, "loss": 0.8472, "step": 120010 }, { "epoch": 2.107129689776857, "grad_norm": 0.0775428983436027, "learning_rate": 0.00012873523299630882, "loss": 0.8474, "step": 120020 }, { "epoch": 2.1073052546568585, "grad_norm": 0.04961760284948472, "learning_rate": 0.00012872453543354568, "loss": 0.8453, "step": 120030 }, { "epoch": 2.10748081953686, "grad_norm": 0.0697490045795182, "learning_rate": 0.00012871383751950739, "loss": 0.8477, "step": 120040 }, { "epoch": 2.1076563844168614, "grad_norm": 0.05116042415217239, "learning_rate": 0.00012870313925432945, "loss": 0.8452, "step": 120050 }, { "epoch": 2.1078319492968625, "grad_norm": 0.07631964339491805, "learning_rate": 0.00012869244063814744, "loss": 0.846, "step": 120060 }, { "epoch": 2.108007514176864, "grad_norm": 0.09231692875797248, "learning_rate": 0.00012868174167109694, "loss": 0.8512, "step": 120070 }, { "epoch": 2.1081830790568654, "grad_norm": 0.06201152392546709, "learning_rate": 0.00012867104235331342, "loss": 0.8348, "step": 120080 }, { "epoch": 2.108358643936867, "grad_norm": 0.05149266023143844, "learning_rate": 0.00012866034268493252, "loss": 0.8436, "step": 120090 }, { "epoch": 2.1085342088168684, "grad_norm": 0.05492878981827273, "learning_rate": 0.00012864964266608972, "loss": 0.854, "step": 120100 }, { "epoch": 2.10870977369687, "grad_norm": 0.0652201064531521, "learning_rate": 0.0001286389422969207, "loss": 0.8479, "step": 120110 }, { "epoch": 2.108885338576871, "grad_norm": 0.05323424349719322, "learning_rate": 0.00012862824157756098, "loss": 0.8421, "step": 120120 }, { "epoch": 2.1090609034568724, "grad_norm": 0.055684206972616855, "learning_rate": 0.0001286175405081461, "loss": 0.8401, "step": 120130 }, { "epoch": 2.109236468336874, "grad_norm": 0.06254448674136937, "learning_rate": 0.00012860683908881175, "loss": 0.8505, "step": 120140 }, { "epoch": 2.1094120332168753, "grad_norm": 0.05958595786606714, "learning_rate": 0.00012859613731969343, "loss": 0.8438, "step": 120150 }, { "epoch": 2.109587598096877, "grad_norm": 0.05400044598537587, "learning_rate": 0.00012858543520092677, "loss": 0.8482, "step": 120160 }, { "epoch": 2.1097631629768783, "grad_norm": 0.06617817637436536, "learning_rate": 0.00012857473273264735, "loss": 0.842, "step": 120170 }, { "epoch": 2.1099387278568793, "grad_norm": 0.06127411261332291, "learning_rate": 0.00012856402991499082, "loss": 0.8437, "step": 120180 }, { "epoch": 2.110114292736881, "grad_norm": 0.05767425611677834, "learning_rate": 0.0001285533267480927, "loss": 0.8487, "step": 120190 }, { "epoch": 2.1102898576168823, "grad_norm": 0.0804391859803979, "learning_rate": 0.00012854262323208862, "loss": 0.847, "step": 120200 }, { "epoch": 2.1104654224968837, "grad_norm": 0.055995745240848084, "learning_rate": 0.00012853191936711432, "loss": 0.8496, "step": 120210 }, { "epoch": 2.110640987376885, "grad_norm": 0.06374635922692969, "learning_rate": 0.00012852121515330523, "loss": 0.8481, "step": 120220 }, { "epoch": 2.1108165522568867, "grad_norm": 0.05526077787021195, "learning_rate": 0.0001285105105907971, "loss": 0.845, "step": 120230 }, { "epoch": 2.1109921171368877, "grad_norm": 0.05283924750285396, "learning_rate": 0.0001284998056797256, "loss": 0.8514, "step": 120240 }, { "epoch": 2.111167682016889, "grad_norm": 0.06821721565522322, "learning_rate": 0.0001284891004202262, "loss": 0.8413, "step": 120250 }, { "epoch": 2.1113432468968907, "grad_norm": 0.05323742140707625, "learning_rate": 0.00012847839481243466, "loss": 0.8459, "step": 120260 }, { "epoch": 2.111518811776892, "grad_norm": 0.047503945701279894, "learning_rate": 0.0001284676888564866, "loss": 0.8392, "step": 120270 }, { "epoch": 2.1116943766568936, "grad_norm": 0.05091347852080487, "learning_rate": 0.00012845698255251764, "loss": 0.8511, "step": 120280 }, { "epoch": 2.111869941536895, "grad_norm": 0.0514573703576317, "learning_rate": 0.00012844627590066348, "loss": 0.8499, "step": 120290 }, { "epoch": 2.112045506416896, "grad_norm": 0.06635009141854468, "learning_rate": 0.0001284355689010597, "loss": 0.8435, "step": 120300 }, { "epoch": 2.1122210712968976, "grad_norm": 0.04800060241562691, "learning_rate": 0.00012842486155384205, "loss": 0.8448, "step": 120310 }, { "epoch": 2.112396636176899, "grad_norm": 0.0499585480168948, "learning_rate": 0.00012841415385914613, "loss": 0.8498, "step": 120320 }, { "epoch": 2.1125722010569006, "grad_norm": 0.05559164721219993, "learning_rate": 0.00012840344581710765, "loss": 0.8532, "step": 120330 }, { "epoch": 2.112747765936902, "grad_norm": 0.044874555252453374, "learning_rate": 0.0001283927374278622, "loss": 0.8531, "step": 120340 }, { "epoch": 2.1129233308169035, "grad_norm": 0.06833185160794958, "learning_rate": 0.00012838202869154558, "loss": 0.8507, "step": 120350 }, { "epoch": 2.113098895696905, "grad_norm": 0.05370908769795035, "learning_rate": 0.00012837131960829337, "loss": 0.8461, "step": 120360 }, { "epoch": 2.113274460576906, "grad_norm": 0.05614456972074223, "learning_rate": 0.0001283606101782413, "loss": 0.851, "step": 120370 }, { "epoch": 2.1134500254569075, "grad_norm": 0.053994180132530795, "learning_rate": 0.00012834990040152503, "loss": 0.8455, "step": 120380 }, { "epoch": 2.113625590336909, "grad_norm": 0.05872084053861498, "learning_rate": 0.00012833919027828032, "loss": 0.8454, "step": 120390 }, { "epoch": 2.1138011552169105, "grad_norm": 0.05150998789942413, "learning_rate": 0.0001283284798086428, "loss": 0.8418, "step": 120400 }, { "epoch": 2.113976720096912, "grad_norm": 0.05712309312530378, "learning_rate": 0.00012831776899274825, "loss": 0.84, "step": 120410 }, { "epoch": 2.1141522849769134, "grad_norm": 0.059926695723715825, "learning_rate": 0.00012830705783073229, "loss": 0.849, "step": 120420 }, { "epoch": 2.1143278498569145, "grad_norm": 0.04469794571336374, "learning_rate": 0.00012829634632273069, "loss": 0.8486, "step": 120430 }, { "epoch": 2.114503414736916, "grad_norm": 0.06053941460389534, "learning_rate": 0.00012828563446887916, "loss": 0.8421, "step": 120440 }, { "epoch": 2.1146789796169174, "grad_norm": 0.0680898910302209, "learning_rate": 0.00012827492226931338, "loss": 0.8396, "step": 120450 }, { "epoch": 2.114854544496919, "grad_norm": 0.0484127206714241, "learning_rate": 0.00012826420972416913, "loss": 0.8385, "step": 120460 }, { "epoch": 2.1150301093769204, "grad_norm": 0.04654008738403729, "learning_rate": 0.00012825349683358214, "loss": 0.8426, "step": 120470 }, { "epoch": 2.115205674256922, "grad_norm": 0.05733258067605185, "learning_rate": 0.00012824278359768808, "loss": 0.8467, "step": 120480 }, { "epoch": 2.115381239136923, "grad_norm": 0.0533479426204141, "learning_rate": 0.00012823207001662275, "loss": 0.8445, "step": 120490 }, { "epoch": 2.1155568040169244, "grad_norm": 0.092325818912977, "learning_rate": 0.00012822135609052187, "loss": 0.8389, "step": 120500 }, { "epoch": 2.115732368896926, "grad_norm": 0.07534653691018381, "learning_rate": 0.0001282106418195212, "loss": 0.8357, "step": 120510 }, { "epoch": 2.1159079337769273, "grad_norm": 0.04871616304876778, "learning_rate": 0.0001281999272037565, "loss": 0.8448, "step": 120520 }, { "epoch": 2.116083498656929, "grad_norm": 0.04837426104389736, "learning_rate": 0.00012818921224336347, "loss": 0.8455, "step": 120530 }, { "epoch": 2.1162590635369303, "grad_norm": 0.05471868937951037, "learning_rate": 0.00012817849693847792, "loss": 0.8469, "step": 120540 }, { "epoch": 2.1164346284169313, "grad_norm": 0.05326301528890997, "learning_rate": 0.0001281677812892356, "loss": 0.8485, "step": 120550 }, { "epoch": 2.116610193296933, "grad_norm": 0.04789783484648204, "learning_rate": 0.00012815706529577232, "loss": 0.8487, "step": 120560 }, { "epoch": 2.1167857581769343, "grad_norm": 0.04925082366714438, "learning_rate": 0.00012814634895822377, "loss": 0.8422, "step": 120570 }, { "epoch": 2.1169613230569357, "grad_norm": 0.04421277663818635, "learning_rate": 0.00012813563227672585, "loss": 0.8515, "step": 120580 }, { "epoch": 2.117136887936937, "grad_norm": 0.05002867921670386, "learning_rate": 0.0001281249152514142, "loss": 0.844, "step": 120590 }, { "epoch": 2.1173124528169387, "grad_norm": 0.0498596924182204, "learning_rate": 0.0001281141978824247, "loss": 0.8442, "step": 120600 }, { "epoch": 2.1174880176969397, "grad_norm": 0.0613022116747638, "learning_rate": 0.0001281034801698931, "loss": 0.8418, "step": 120610 }, { "epoch": 2.117663582576941, "grad_norm": 0.0557323827904395, "learning_rate": 0.0001280927621139552, "loss": 0.835, "step": 120620 }, { "epoch": 2.1178391474569427, "grad_norm": 0.052802398628669076, "learning_rate": 0.00012808204371474687, "loss": 0.8392, "step": 120630 }, { "epoch": 2.118014712336944, "grad_norm": 0.05345063534616969, "learning_rate": 0.0001280713249724038, "loss": 0.8482, "step": 120640 }, { "epoch": 2.1181902772169456, "grad_norm": 0.06146079217569216, "learning_rate": 0.00012806060588706187, "loss": 0.8462, "step": 120650 }, { "epoch": 2.118365842096947, "grad_norm": 0.08142762403119296, "learning_rate": 0.00012804988645885687, "loss": 0.8375, "step": 120660 }, { "epoch": 2.118541406976948, "grad_norm": 0.04707940276618875, "learning_rate": 0.00012803916668792465, "loss": 0.8433, "step": 120670 }, { "epoch": 2.1187169718569496, "grad_norm": 0.06834024825386517, "learning_rate": 0.000128028446574401, "loss": 0.8418, "step": 120680 }, { "epoch": 2.118892536736951, "grad_norm": 0.06456022619399358, "learning_rate": 0.00012801772611842173, "loss": 0.8524, "step": 120690 }, { "epoch": 2.1190681016169526, "grad_norm": 0.07876484514183339, "learning_rate": 0.00012800700532012273, "loss": 0.8498, "step": 120700 }, { "epoch": 2.119243666496954, "grad_norm": 0.05002167554150724, "learning_rate": 0.00012799628417963977, "loss": 0.8397, "step": 120710 }, { "epoch": 2.1194192313769555, "grad_norm": 0.05972542470910255, "learning_rate": 0.00012798556269710872, "loss": 0.8491, "step": 120720 }, { "epoch": 2.1195947962569566, "grad_norm": 0.059743684518182724, "learning_rate": 0.0001279748408726654, "loss": 0.8442, "step": 120730 }, { "epoch": 2.119770361136958, "grad_norm": 0.05346207544712484, "learning_rate": 0.00012796411870644568, "loss": 0.8458, "step": 120740 }, { "epoch": 2.1199459260169595, "grad_norm": 0.051467842497595354, "learning_rate": 0.00012795339619858543, "loss": 0.8449, "step": 120750 }, { "epoch": 2.120121490896961, "grad_norm": 0.05502775104849383, "learning_rate": 0.0001279426733492205, "loss": 0.8518, "step": 120760 }, { "epoch": 2.1202970557769625, "grad_norm": 0.05448512029548603, "learning_rate": 0.0001279319501584867, "loss": 0.8446, "step": 120770 }, { "epoch": 2.120472620656964, "grad_norm": 0.04496531958479392, "learning_rate": 0.00012792122662651993, "loss": 0.8441, "step": 120780 }, { "epoch": 2.120648185536965, "grad_norm": 0.0520769890161646, "learning_rate": 0.00012791050275345607, "loss": 0.8468, "step": 120790 }, { "epoch": 2.1208237504169665, "grad_norm": 0.04185506043679376, "learning_rate": 0.000127899778539431, "loss": 0.8493, "step": 120800 }, { "epoch": 2.120999315296968, "grad_norm": 0.06324232729460372, "learning_rate": 0.00012788905398458054, "loss": 0.8475, "step": 120810 }, { "epoch": 2.1211748801769694, "grad_norm": 0.04787170721276395, "learning_rate": 0.00012787832908904068, "loss": 0.8431, "step": 120820 }, { "epoch": 2.121350445056971, "grad_norm": 0.05809126716204946, "learning_rate": 0.00012786760385294715, "loss": 0.841, "step": 120830 }, { "epoch": 2.1215260099369724, "grad_norm": 0.05847671316649162, "learning_rate": 0.000127856878276436, "loss": 0.845, "step": 120840 }, { "epoch": 2.1217015748169734, "grad_norm": 0.045302080482299535, "learning_rate": 0.00012784615235964303, "loss": 0.8483, "step": 120850 }, { "epoch": 2.121877139696975, "grad_norm": 0.04410287494310764, "learning_rate": 0.00012783542610270417, "loss": 0.8441, "step": 120860 }, { "epoch": 2.1220527045769764, "grad_norm": 0.06286640068732423, "learning_rate": 0.00012782469950575534, "loss": 0.8425, "step": 120870 }, { "epoch": 2.122228269456978, "grad_norm": 0.06106314189582124, "learning_rate": 0.00012781397256893238, "loss": 0.8485, "step": 120880 }, { "epoch": 2.1224038343369793, "grad_norm": 0.05386118987079417, "learning_rate": 0.00012780324529237128, "loss": 0.8543, "step": 120890 }, { "epoch": 2.122579399216981, "grad_norm": 0.06865444486780099, "learning_rate": 0.0001277925176762079, "loss": 0.8439, "step": 120900 }, { "epoch": 2.122754964096982, "grad_norm": 0.04916098393037035, "learning_rate": 0.00012778178972057817, "loss": 0.8411, "step": 120910 }, { "epoch": 2.1229305289769833, "grad_norm": 0.08748638737033435, "learning_rate": 0.0001277710614256181, "loss": 0.8431, "step": 120920 }, { "epoch": 2.123106093856985, "grad_norm": 0.07121657338992499, "learning_rate": 0.0001277603327914635, "loss": 0.843, "step": 120930 }, { "epoch": 2.1232816587369863, "grad_norm": 0.056237356547752505, "learning_rate": 0.0001277496038182504, "loss": 0.8438, "step": 120940 }, { "epoch": 2.1234572236169877, "grad_norm": 0.05143683552673948, "learning_rate": 0.00012773887450611465, "loss": 0.8521, "step": 120950 }, { "epoch": 2.123632788496989, "grad_norm": 0.058638204337942035, "learning_rate": 0.00012772814485519223, "loss": 0.8463, "step": 120960 }, { "epoch": 2.1238083533769903, "grad_norm": 0.044994579788062386, "learning_rate": 0.0001277174148656191, "loss": 0.8415, "step": 120970 }, { "epoch": 2.1239839182569917, "grad_norm": 0.04931286165918005, "learning_rate": 0.00012770668453753123, "loss": 0.8516, "step": 120980 }, { "epoch": 2.124159483136993, "grad_norm": 0.06059540905291427, "learning_rate": 0.00012769595387106454, "loss": 0.8443, "step": 120990 }, { "epoch": 2.1243350480169947, "grad_norm": 0.04989914196665394, "learning_rate": 0.00012768522286635498, "loss": 0.8407, "step": 121000 }, { "epoch": 2.124510612896996, "grad_norm": 0.05699358562273494, "learning_rate": 0.00012767449152353857, "loss": 0.85, "step": 121010 }, { "epoch": 2.1246861777769976, "grad_norm": 0.07154571271018864, "learning_rate": 0.0001276637598427512, "loss": 0.8563, "step": 121020 }, { "epoch": 2.124861742656999, "grad_norm": 0.05102595240415249, "learning_rate": 0.00012765302782412888, "loss": 0.8481, "step": 121030 }, { "epoch": 2.125037307537, "grad_norm": 0.05911004215512329, "learning_rate": 0.00012764229546780763, "loss": 0.8433, "step": 121040 }, { "epoch": 2.1252128724170016, "grad_norm": 0.043139953510372905, "learning_rate": 0.00012763156277392334, "loss": 0.8411, "step": 121050 }, { "epoch": 2.125388437297003, "grad_norm": 0.04874995424381429, "learning_rate": 0.00012762082974261208, "loss": 0.8406, "step": 121060 }, { "epoch": 2.1255640021770046, "grad_norm": 0.05894145788657715, "learning_rate": 0.00012761009637400977, "loss": 0.8479, "step": 121070 }, { "epoch": 2.125739567057006, "grad_norm": 0.07746761075596896, "learning_rate": 0.00012759936266825247, "loss": 0.8506, "step": 121080 }, { "epoch": 2.125915131937007, "grad_norm": 0.08277006502975455, "learning_rate": 0.00012758862862547618, "loss": 0.8449, "step": 121090 }, { "epoch": 2.1260906968170086, "grad_norm": 0.05992735167901212, "learning_rate": 0.0001275778942458168, "loss": 0.8494, "step": 121100 }, { "epoch": 2.12626626169701, "grad_norm": 0.0571665284827362, "learning_rate": 0.00012756715952941047, "loss": 0.8452, "step": 121110 }, { "epoch": 2.1264418265770115, "grad_norm": 0.056317448253266726, "learning_rate": 0.00012755642447639308, "loss": 0.8439, "step": 121120 }, { "epoch": 2.126617391457013, "grad_norm": 0.06003236902337512, "learning_rate": 0.00012754568908690075, "loss": 0.85, "step": 121130 }, { "epoch": 2.1267929563370145, "grad_norm": 0.047557460797166724, "learning_rate": 0.0001275349533610694, "loss": 0.8446, "step": 121140 }, { "epoch": 2.126968521217016, "grad_norm": 0.054727725923606696, "learning_rate": 0.00012752421729903514, "loss": 0.8546, "step": 121150 }, { "epoch": 2.127144086097017, "grad_norm": 0.05374675776467751, "learning_rate": 0.00012751348090093398, "loss": 0.847, "step": 121160 }, { "epoch": 2.1273196509770185, "grad_norm": 0.06087439283356885, "learning_rate": 0.0001275027441669019, "loss": 0.8403, "step": 121170 }, { "epoch": 2.12749521585702, "grad_norm": 0.057487291774440216, "learning_rate": 0.00012749200709707498, "loss": 0.8474, "step": 121180 }, { "epoch": 2.1276707807370214, "grad_norm": 0.04907112720130079, "learning_rate": 0.00012748126969158925, "loss": 0.8411, "step": 121190 }, { "epoch": 2.127846345617023, "grad_norm": 0.06567519968936898, "learning_rate": 0.00012747053195058078, "loss": 0.8447, "step": 121200 }, { "epoch": 2.1280219104970244, "grad_norm": 0.0720864372133087, "learning_rate": 0.00012745979387418561, "loss": 0.8409, "step": 121210 }, { "epoch": 2.1281974753770254, "grad_norm": 0.053132547949080414, "learning_rate": 0.00012744905546253975, "loss": 0.8417, "step": 121220 }, { "epoch": 2.128373040257027, "grad_norm": 0.06458251270445044, "learning_rate": 0.0001274383167157793, "loss": 0.8468, "step": 121230 }, { "epoch": 2.1285486051370284, "grad_norm": 0.0636278241897984, "learning_rate": 0.00012742757763404026, "loss": 0.8487, "step": 121240 }, { "epoch": 2.12872417001703, "grad_norm": 0.052576106112149294, "learning_rate": 0.0001274168382174588, "loss": 0.8556, "step": 121250 }, { "epoch": 2.1288997348970313, "grad_norm": 0.06937656808786613, "learning_rate": 0.00012740609846617092, "loss": 0.8466, "step": 121260 }, { "epoch": 2.129075299777033, "grad_norm": 0.060072200441324566, "learning_rate": 0.00012739535838031274, "loss": 0.84, "step": 121270 }, { "epoch": 2.129250864657034, "grad_norm": 0.08293624581075938, "learning_rate": 0.0001273846179600203, "loss": 0.8429, "step": 121280 }, { "epoch": 2.1294264295370353, "grad_norm": 0.09017318650155191, "learning_rate": 0.00012737387720542965, "loss": 0.8427, "step": 121290 }, { "epoch": 2.129601994417037, "grad_norm": 0.06565781018814018, "learning_rate": 0.00012736313611667698, "loss": 0.8462, "step": 121300 }, { "epoch": 2.1297775592970383, "grad_norm": 0.053488956328955124, "learning_rate": 0.0001273523946938983, "loss": 0.8461, "step": 121310 }, { "epoch": 2.1299531241770397, "grad_norm": 0.058931913459728265, "learning_rate": 0.00012734165293722973, "loss": 0.8427, "step": 121320 }, { "epoch": 2.1301286890570412, "grad_norm": 0.06224932675176705, "learning_rate": 0.00012733091084680736, "loss": 0.8418, "step": 121330 }, { "epoch": 2.1303042539370423, "grad_norm": 0.04536557503075575, "learning_rate": 0.00012732016842276732, "loss": 0.8414, "step": 121340 }, { "epoch": 2.1304798188170437, "grad_norm": 0.04895075545377758, "learning_rate": 0.00012730942566524568, "loss": 0.8397, "step": 121350 }, { "epoch": 2.130655383697045, "grad_norm": 0.09879464323539412, "learning_rate": 0.00012729868257437858, "loss": 0.8472, "step": 121360 }, { "epoch": 2.1308309485770467, "grad_norm": 0.05833389434955276, "learning_rate": 0.00012728793915030214, "loss": 0.8458, "step": 121370 }, { "epoch": 2.131006513457048, "grad_norm": 0.048872779012008545, "learning_rate": 0.00012727719539315247, "loss": 0.8426, "step": 121380 }, { "epoch": 2.1311820783370496, "grad_norm": 0.05700759308312006, "learning_rate": 0.00012726645130306568, "loss": 0.8482, "step": 121390 }, { "epoch": 2.1313576432170507, "grad_norm": 0.06892270936933463, "learning_rate": 0.00012725570688017795, "loss": 0.8469, "step": 121400 }, { "epoch": 2.131533208097052, "grad_norm": 0.07467425315040163, "learning_rate": 0.00012724496212462535, "loss": 0.8444, "step": 121410 }, { "epoch": 2.1317087729770536, "grad_norm": 0.05680642434103941, "learning_rate": 0.0001272342170365441, "loss": 0.8444, "step": 121420 }, { "epoch": 2.131884337857055, "grad_norm": 0.08797298692466085, "learning_rate": 0.00012722347161607024, "loss": 0.8448, "step": 121430 }, { "epoch": 2.1320599027370566, "grad_norm": 0.06718051286637969, "learning_rate": 0.00012721272586334, "loss": 0.8496, "step": 121440 }, { "epoch": 2.132235467617058, "grad_norm": 0.06524732592239288, "learning_rate": 0.00012720197977848948, "loss": 0.8445, "step": 121450 }, { "epoch": 2.132411032497059, "grad_norm": 0.056767385445960605, "learning_rate": 0.00012719123336165487, "loss": 0.8448, "step": 121460 }, { "epoch": 2.1325865973770606, "grad_norm": 0.059069090892374436, "learning_rate": 0.00012718048661297231, "loss": 0.8472, "step": 121470 }, { "epoch": 2.132762162257062, "grad_norm": 0.07982275053857242, "learning_rate": 0.00012716973953257795, "loss": 0.8475, "step": 121480 }, { "epoch": 2.1329377271370635, "grad_norm": 0.053828731364708046, "learning_rate": 0.000127158992120608, "loss": 0.8538, "step": 121490 }, { "epoch": 2.133113292017065, "grad_norm": 0.053085481375353245, "learning_rate": 0.00012714824437719862, "loss": 0.8452, "step": 121500 }, { "epoch": 2.1332888568970665, "grad_norm": 0.05769245641347034, "learning_rate": 0.00012713749630248594, "loss": 0.849, "step": 121510 }, { "epoch": 2.1334644217770675, "grad_norm": 0.0628167361560404, "learning_rate": 0.00012712674789660617, "loss": 0.8477, "step": 121520 }, { "epoch": 2.133639986657069, "grad_norm": 0.051877009840877876, "learning_rate": 0.00012711599915969548, "loss": 0.8504, "step": 121530 }, { "epoch": 2.1338155515370705, "grad_norm": 0.0497730165284881, "learning_rate": 0.00012710525009189012, "loss": 0.8532, "step": 121540 }, { "epoch": 2.133991116417072, "grad_norm": 0.0750875159992833, "learning_rate": 0.00012709450069332625, "loss": 0.8394, "step": 121550 }, { "epoch": 2.1341666812970734, "grad_norm": 0.05030537431276347, "learning_rate": 0.00012708375096414, "loss": 0.8477, "step": 121560 }, { "epoch": 2.134342246177075, "grad_norm": 0.07515800141164694, "learning_rate": 0.00012707300090446764, "loss": 0.8404, "step": 121570 }, { "epoch": 2.134517811057076, "grad_norm": 0.0624666954611381, "learning_rate": 0.0001270622505144454, "loss": 0.8472, "step": 121580 }, { "epoch": 2.1346933759370774, "grad_norm": 0.06288740846719176, "learning_rate": 0.0001270514997942094, "loss": 0.8417, "step": 121590 }, { "epoch": 2.134868940817079, "grad_norm": 0.05040645760598401, "learning_rate": 0.00012704074874389596, "loss": 0.8455, "step": 121600 }, { "epoch": 2.1350445056970804, "grad_norm": 0.0532483718112495, "learning_rate": 0.0001270299973636412, "loss": 0.8379, "step": 121610 }, { "epoch": 2.135220070577082, "grad_norm": 0.052868114313295855, "learning_rate": 0.0001270192456535814, "loss": 0.85, "step": 121620 }, { "epoch": 2.1353956354570833, "grad_norm": 0.0510002351359912, "learning_rate": 0.00012700849361385277, "loss": 0.8456, "step": 121630 }, { "epoch": 2.1355712003370844, "grad_norm": 0.06452661195290794, "learning_rate": 0.00012699774124459156, "loss": 0.8512, "step": 121640 }, { "epoch": 2.135746765217086, "grad_norm": 0.04744673713645255, "learning_rate": 0.00012698698854593393, "loss": 0.844, "step": 121650 }, { "epoch": 2.1359223300970873, "grad_norm": 0.04913355677577753, "learning_rate": 0.00012697623551801622, "loss": 0.8366, "step": 121660 }, { "epoch": 2.136097894977089, "grad_norm": 0.05339341361307408, "learning_rate": 0.00012696548216097465, "loss": 0.8457, "step": 121670 }, { "epoch": 2.1362734598570903, "grad_norm": 0.05983972237817765, "learning_rate": 0.00012695472847494538, "loss": 0.8425, "step": 121680 }, { "epoch": 2.1364490247370918, "grad_norm": 0.060022299891216144, "learning_rate": 0.00012694397446006477, "loss": 0.8415, "step": 121690 }, { "epoch": 2.1366245896170932, "grad_norm": 0.045921953824434064, "learning_rate": 0.00012693322011646902, "loss": 0.8547, "step": 121700 }, { "epoch": 2.1368001544970943, "grad_norm": 0.05623920617071648, "learning_rate": 0.0001269224654442944, "loss": 0.8479, "step": 121710 }, { "epoch": 2.1369757193770957, "grad_norm": 0.05420956876919341, "learning_rate": 0.0001269117104436772, "loss": 0.8475, "step": 121720 }, { "epoch": 2.137151284257097, "grad_norm": 0.041691592224262004, "learning_rate": 0.0001269009551147536, "loss": 0.8493, "step": 121730 }, { "epoch": 2.1373268491370987, "grad_norm": 0.062381535569252834, "learning_rate": 0.00012689019945766, "loss": 0.845, "step": 121740 }, { "epoch": 2.1375024140171, "grad_norm": 0.049057228841299545, "learning_rate": 0.00012687944347253258, "loss": 0.8501, "step": 121750 }, { "epoch": 2.137677978897101, "grad_norm": 0.045106639520723475, "learning_rate": 0.00012686868715950763, "loss": 0.8385, "step": 121760 }, { "epoch": 2.1378535437771027, "grad_norm": 0.05267462105795237, "learning_rate": 0.00012685793051872153, "loss": 0.8495, "step": 121770 }, { "epoch": 2.138029108657104, "grad_norm": 0.06173883210335855, "learning_rate": 0.0001268471735503104, "loss": 0.8436, "step": 121780 }, { "epoch": 2.1382046735371056, "grad_norm": 0.053920561240786535, "learning_rate": 0.0001268364162544107, "loss": 0.8448, "step": 121790 }, { "epoch": 2.138380238417107, "grad_norm": 0.05012129913394474, "learning_rate": 0.00012682565863115862, "loss": 0.8445, "step": 121800 }, { "epoch": 2.1385558032971086, "grad_norm": 0.07831914220490918, "learning_rate": 0.00012681490068069056, "loss": 0.843, "step": 121810 }, { "epoch": 2.13873136817711, "grad_norm": 0.05983074888379308, "learning_rate": 0.00012680414240314266, "loss": 0.8415, "step": 121820 }, { "epoch": 2.138906933057111, "grad_norm": 0.05662485089780951, "learning_rate": 0.0001267933837986514, "loss": 0.8506, "step": 121830 }, { "epoch": 2.1390824979371126, "grad_norm": 0.05127954270965801, "learning_rate": 0.000126782624867353, "loss": 0.8418, "step": 121840 }, { "epoch": 2.139258062817114, "grad_norm": 0.056898847858386586, "learning_rate": 0.00012677186560938378, "loss": 0.8467, "step": 121850 }, { "epoch": 2.1394336276971155, "grad_norm": 0.041614681709839216, "learning_rate": 0.00012676110602488015, "loss": 0.8565, "step": 121860 }, { "epoch": 2.139609192577117, "grad_norm": 0.05065493048824311, "learning_rate": 0.0001267503461139783, "loss": 0.8385, "step": 121870 }, { "epoch": 2.139784757457118, "grad_norm": 0.07479262508633547, "learning_rate": 0.00012673958587681465, "loss": 0.8446, "step": 121880 }, { "epoch": 2.1399603223371195, "grad_norm": 0.05895899819262954, "learning_rate": 0.00012672882531352553, "loss": 0.8545, "step": 121890 }, { "epoch": 2.140135887217121, "grad_norm": 0.06897900497619065, "learning_rate": 0.00012671806442424724, "loss": 0.8461, "step": 121900 }, { "epoch": 2.1403114520971225, "grad_norm": 0.05971589701391146, "learning_rate": 0.00012670730320911617, "loss": 0.8451, "step": 121910 }, { "epoch": 2.140487016977124, "grad_norm": 0.057832430433844825, "learning_rate": 0.0001266965416682686, "loss": 0.8505, "step": 121920 }, { "epoch": 2.1406625818571254, "grad_norm": 0.043374746657688965, "learning_rate": 0.00012668577980184096, "loss": 0.8423, "step": 121930 }, { "epoch": 2.140838146737127, "grad_norm": 0.044150327259708265, "learning_rate": 0.00012667501760996958, "loss": 0.8471, "step": 121940 }, { "epoch": 2.141013711617128, "grad_norm": 0.05668230100539992, "learning_rate": 0.0001266642550927908, "loss": 0.8422, "step": 121950 }, { "epoch": 2.1411892764971294, "grad_norm": 0.04781377722242677, "learning_rate": 0.00012665349225044097, "loss": 0.8516, "step": 121960 }, { "epoch": 2.141364841377131, "grad_norm": 0.08308324231755497, "learning_rate": 0.00012664272908305646, "loss": 0.8476, "step": 121970 }, { "epoch": 2.1415404062571324, "grad_norm": 0.05400825809611451, "learning_rate": 0.0001266319655907737, "loss": 0.8424, "step": 121980 }, { "epoch": 2.141715971137134, "grad_norm": 0.06515672195379303, "learning_rate": 0.00012662120177372898, "loss": 0.8432, "step": 121990 }, { "epoch": 2.1418915360171353, "grad_norm": 0.06425784240917502, "learning_rate": 0.00012661043763205872, "loss": 0.8447, "step": 122000 }, { "epoch": 2.1420671008971364, "grad_norm": 0.05637481893243189, "learning_rate": 0.00012659967316589938, "loss": 0.8426, "step": 122010 }, { "epoch": 2.142242665777138, "grad_norm": 0.06109153155541413, "learning_rate": 0.00012658890837538719, "loss": 0.848, "step": 122020 }, { "epoch": 2.1424182306571393, "grad_norm": 0.07007960119523468, "learning_rate": 0.0001265781432606587, "loss": 0.8463, "step": 122030 }, { "epoch": 2.142593795537141, "grad_norm": 0.050094164044648115, "learning_rate": 0.00012656737782185018, "loss": 0.8373, "step": 122040 }, { "epoch": 2.1427693604171423, "grad_norm": 0.04658511953362383, "learning_rate": 0.0001265566120590981, "loss": 0.8453, "step": 122050 }, { "epoch": 2.1429449252971438, "grad_norm": 0.0555397156679044, "learning_rate": 0.00012654584597253886, "loss": 0.8376, "step": 122060 }, { "epoch": 2.143120490177145, "grad_norm": 0.05183612206850376, "learning_rate": 0.0001265350795623088, "loss": 0.8454, "step": 122070 }, { "epoch": 2.1432960550571463, "grad_norm": 0.05991558779222775, "learning_rate": 0.00012652431282854446, "loss": 0.8484, "step": 122080 }, { "epoch": 2.1434716199371477, "grad_norm": 0.04330223921377113, "learning_rate": 0.00012651354577138213, "loss": 0.8542, "step": 122090 }, { "epoch": 2.143647184817149, "grad_norm": 0.08281452034897126, "learning_rate": 0.00012650277839095831, "loss": 0.8467, "step": 122100 }, { "epoch": 2.1438227496971507, "grad_norm": 0.05261789885832064, "learning_rate": 0.00012649201068740945, "loss": 0.8438, "step": 122110 }, { "epoch": 2.143998314577152, "grad_norm": 0.05286165582929605, "learning_rate": 0.00012648124266087185, "loss": 0.8436, "step": 122120 }, { "epoch": 2.144173879457153, "grad_norm": 0.045449611343035025, "learning_rate": 0.00012647047431148207, "loss": 0.8472, "step": 122130 }, { "epoch": 2.1443494443371547, "grad_norm": 0.05957479969236508, "learning_rate": 0.00012645970563937648, "loss": 0.8418, "step": 122140 }, { "epoch": 2.144525009217156, "grad_norm": 0.04566570898303258, "learning_rate": 0.00012644893664469156, "loss": 0.8434, "step": 122150 }, { "epoch": 2.1447005740971576, "grad_norm": 0.06234597847940553, "learning_rate": 0.00012643816732756368, "loss": 0.8434, "step": 122160 }, { "epoch": 2.144876138977159, "grad_norm": 0.05077508017621042, "learning_rate": 0.00012642739768812942, "loss": 0.8505, "step": 122170 }, { "epoch": 2.1450517038571606, "grad_norm": 0.06115892797971896, "learning_rate": 0.00012641662772652513, "loss": 0.8427, "step": 122180 }, { "epoch": 2.1452272687371616, "grad_norm": 0.06418232687655422, "learning_rate": 0.00012640585744288723, "loss": 0.8441, "step": 122190 }, { "epoch": 2.145402833617163, "grad_norm": 0.056253494741729915, "learning_rate": 0.00012639508683735232, "loss": 0.8421, "step": 122200 }, { "epoch": 2.1455783984971646, "grad_norm": 0.052564378731507495, "learning_rate": 0.00012638431591005677, "loss": 0.8461, "step": 122210 }, { "epoch": 2.145753963377166, "grad_norm": 0.057955258975678575, "learning_rate": 0.0001263735446611371, "loss": 0.847, "step": 122220 }, { "epoch": 2.1459295282571675, "grad_norm": 0.057378961528014534, "learning_rate": 0.00012636277309072976, "loss": 0.8489, "step": 122230 }, { "epoch": 2.146105093137169, "grad_norm": 0.05269813119111132, "learning_rate": 0.00012635200119897117, "loss": 0.8465, "step": 122240 }, { "epoch": 2.14628065801717, "grad_norm": 0.054666038594552775, "learning_rate": 0.0001263412289859979, "loss": 0.8406, "step": 122250 }, { "epoch": 2.1464562228971715, "grad_norm": 0.05511157330298314, "learning_rate": 0.00012633045645194638, "loss": 0.851, "step": 122260 }, { "epoch": 2.146631787777173, "grad_norm": 0.055120575672913875, "learning_rate": 0.0001263196835969531, "loss": 0.8509, "step": 122270 }, { "epoch": 2.1468073526571745, "grad_norm": 0.048713549072839105, "learning_rate": 0.0001263089104211547, "loss": 0.8489, "step": 122280 }, { "epoch": 2.146982917537176, "grad_norm": 0.06139539222663221, "learning_rate": 0.00012629813692468744, "loss": 0.8469, "step": 122290 }, { "epoch": 2.1471584824171774, "grad_norm": 0.04720071451965171, "learning_rate": 0.00012628736310768795, "loss": 0.84, "step": 122300 }, { "epoch": 2.1473340472971785, "grad_norm": 0.059677953320660726, "learning_rate": 0.00012627658897029277, "loss": 0.8494, "step": 122310 }, { "epoch": 2.14750961217718, "grad_norm": 0.05094274065752045, "learning_rate": 0.00012626581451263832, "loss": 0.8459, "step": 122320 }, { "epoch": 2.1476851770571814, "grad_norm": 0.056672922249999376, "learning_rate": 0.00012625503973486116, "loss": 0.8467, "step": 122330 }, { "epoch": 2.147860741937183, "grad_norm": 0.052459868051554524, "learning_rate": 0.00012624426463709782, "loss": 0.8394, "step": 122340 }, { "epoch": 2.1480363068171844, "grad_norm": 0.08510818871732356, "learning_rate": 0.00012623348921948484, "loss": 0.844, "step": 122350 }, { "epoch": 2.148211871697186, "grad_norm": 0.05583045469835623, "learning_rate": 0.00012622271348215866, "loss": 0.844, "step": 122360 }, { "epoch": 2.148387436577187, "grad_norm": 0.04652216579151652, "learning_rate": 0.0001262119374252559, "loss": 0.8489, "step": 122370 }, { "epoch": 2.1485630014571884, "grad_norm": 0.059556016936233934, "learning_rate": 0.00012620116104891305, "loss": 0.8479, "step": 122380 }, { "epoch": 2.14873856633719, "grad_norm": 0.04518820831933173, "learning_rate": 0.00012619038435326664, "loss": 0.8478, "step": 122390 }, { "epoch": 2.1489141312171913, "grad_norm": 0.08706766072593099, "learning_rate": 0.00012617960733845326, "loss": 0.8463, "step": 122400 }, { "epoch": 2.149089696097193, "grad_norm": 0.04382815815947329, "learning_rate": 0.0001261688300046094, "loss": 0.8426, "step": 122410 }, { "epoch": 2.1492652609771943, "grad_norm": 0.052601616389815, "learning_rate": 0.0001261580523518717, "loss": 0.8447, "step": 122420 }, { "epoch": 2.1494408258571953, "grad_norm": 0.09976124812563328, "learning_rate": 0.00012614727438037658, "loss": 0.8462, "step": 122430 }, { "epoch": 2.149616390737197, "grad_norm": 0.09997674844236455, "learning_rate": 0.00012613649609026072, "loss": 0.8447, "step": 122440 }, { "epoch": 2.1497919556171983, "grad_norm": 0.05008535556970504, "learning_rate": 0.00012612571748166066, "loss": 0.8509, "step": 122450 }, { "epoch": 2.1499675204971997, "grad_norm": 0.05406101587645634, "learning_rate": 0.0001261149385547129, "loss": 0.8471, "step": 122460 }, { "epoch": 2.1501430853772012, "grad_norm": 0.06892209918433047, "learning_rate": 0.00012610415930955406, "loss": 0.8468, "step": 122470 }, { "epoch": 2.1503186502572027, "grad_norm": 0.05942190181735698, "learning_rate": 0.0001260933797463207, "loss": 0.8434, "step": 122480 }, { "epoch": 2.150494215137204, "grad_norm": 0.06358348531901072, "learning_rate": 0.00012608259986514946, "loss": 0.8442, "step": 122490 }, { "epoch": 2.150669780017205, "grad_norm": 0.05656266479823076, "learning_rate": 0.00012607181966617682, "loss": 0.8444, "step": 122500 }, { "epoch": 2.1508453448972067, "grad_norm": 0.05708493733728223, "learning_rate": 0.00012606103914953947, "loss": 0.846, "step": 122510 }, { "epoch": 2.151020909777208, "grad_norm": 0.05812347736233087, "learning_rate": 0.00012605025831537392, "loss": 0.8453, "step": 122520 }, { "epoch": 2.1511964746572096, "grad_norm": 0.05197321013549247, "learning_rate": 0.00012603947716381678, "loss": 0.8506, "step": 122530 }, { "epoch": 2.151372039537211, "grad_norm": 0.04817383420698873, "learning_rate": 0.00012602869569500468, "loss": 0.8465, "step": 122540 }, { "epoch": 2.151547604417212, "grad_norm": 0.05916210859272396, "learning_rate": 0.0001260179139090742, "loss": 0.837, "step": 122550 }, { "epoch": 2.1517231692972136, "grad_norm": 0.06343025198297428, "learning_rate": 0.00012600713180616196, "loss": 0.8441, "step": 122560 }, { "epoch": 2.151898734177215, "grad_norm": 0.04953946538727044, "learning_rate": 0.00012599634938640462, "loss": 0.845, "step": 122570 }, { "epoch": 2.1520742990572166, "grad_norm": 0.04621058091292335, "learning_rate": 0.00012598556664993865, "loss": 0.8392, "step": 122580 }, { "epoch": 2.152249863937218, "grad_norm": 0.07060981525282273, "learning_rate": 0.00012597478359690082, "loss": 0.8462, "step": 122590 }, { "epoch": 2.1524254288172195, "grad_norm": 0.053263150881923176, "learning_rate": 0.0001259640002274277, "loss": 0.8407, "step": 122600 }, { "epoch": 2.152600993697221, "grad_norm": 0.0705206887480015, "learning_rate": 0.00012595321654165585, "loss": 0.8426, "step": 122610 }, { "epoch": 2.152776558577222, "grad_norm": 0.05640936225135474, "learning_rate": 0.00012594243253972207, "loss": 0.8369, "step": 122620 }, { "epoch": 2.1529521234572235, "grad_norm": 0.09945043045481991, "learning_rate": 0.00012593164822176283, "loss": 0.8494, "step": 122630 }, { "epoch": 2.153127688337225, "grad_norm": 0.05878893658114796, "learning_rate": 0.00012592086358791482, "loss": 0.8406, "step": 122640 }, { "epoch": 2.1533032532172265, "grad_norm": 0.04678317035051498, "learning_rate": 0.0001259100786383147, "loss": 0.8418, "step": 122650 }, { "epoch": 2.153478818097228, "grad_norm": 0.0459123683897107, "learning_rate": 0.00012589929337309912, "loss": 0.8519, "step": 122660 }, { "epoch": 2.1536543829772294, "grad_norm": 0.049907685630294595, "learning_rate": 0.0001258885077924047, "loss": 0.8484, "step": 122670 }, { "epoch": 2.1538299478572305, "grad_norm": 0.050935928531704984, "learning_rate": 0.00012587772189636814, "loss": 0.8487, "step": 122680 }, { "epoch": 2.154005512737232, "grad_norm": 0.07696801893654553, "learning_rate": 0.0001258669356851261, "loss": 0.8541, "step": 122690 }, { "epoch": 2.1541810776172334, "grad_norm": 0.0688353112860496, "learning_rate": 0.00012585614915881516, "loss": 0.8527, "step": 122700 }, { "epoch": 2.154356642497235, "grad_norm": 0.07577347980263846, "learning_rate": 0.00012584536231757208, "loss": 0.8402, "step": 122710 }, { "epoch": 2.1545322073772364, "grad_norm": 0.042024828477999994, "learning_rate": 0.00012583457516153346, "loss": 0.8422, "step": 122720 }, { "epoch": 2.154707772257238, "grad_norm": 0.04406056863707316, "learning_rate": 0.00012582378769083606, "loss": 0.8528, "step": 122730 }, { "epoch": 2.154883337137239, "grad_norm": 0.09021554353527346, "learning_rate": 0.0001258129999056165, "loss": 0.844, "step": 122740 }, { "epoch": 2.1550589020172404, "grad_norm": 0.05287361083521872, "learning_rate": 0.00012580221180601148, "loss": 0.8474, "step": 122750 }, { "epoch": 2.155234466897242, "grad_norm": 0.05473318537889592, "learning_rate": 0.00012579142339215767, "loss": 0.8539, "step": 122760 }, { "epoch": 2.1554100317772433, "grad_norm": 0.053829202439265866, "learning_rate": 0.00012578063466419176, "loss": 0.8438, "step": 122770 }, { "epoch": 2.155585596657245, "grad_norm": 0.0970537609150588, "learning_rate": 0.00012576984562225047, "loss": 0.8526, "step": 122780 }, { "epoch": 2.1557611615372463, "grad_norm": 0.062152059649286634, "learning_rate": 0.00012575905626647051, "loss": 0.8449, "step": 122790 }, { "epoch": 2.1559367264172473, "grad_norm": 0.08194447026435826, "learning_rate": 0.00012574826659698856, "loss": 0.8528, "step": 122800 }, { "epoch": 2.156112291297249, "grad_norm": 0.06555298533572194, "learning_rate": 0.0001257374766139413, "loss": 0.8498, "step": 122810 }, { "epoch": 2.1562878561772503, "grad_norm": 0.05450481350545309, "learning_rate": 0.0001257266863174655, "loss": 0.8446, "step": 122820 }, { "epoch": 2.1564634210572518, "grad_norm": 0.06537404862425224, "learning_rate": 0.00012571589570769783, "loss": 0.8438, "step": 122830 }, { "epoch": 2.1566389859372532, "grad_norm": 0.057752259951987864, "learning_rate": 0.00012570510478477503, "loss": 0.8419, "step": 122840 }, { "epoch": 2.1568145508172547, "grad_norm": 0.04934172814569647, "learning_rate": 0.00012569431354883382, "loss": 0.8444, "step": 122850 }, { "epoch": 2.1569901156972557, "grad_norm": 0.059858435091536995, "learning_rate": 0.00012568352200001092, "loss": 0.8547, "step": 122860 }, { "epoch": 2.157165680577257, "grad_norm": 0.0452909810479934, "learning_rate": 0.00012567273013844307, "loss": 0.8492, "step": 122870 }, { "epoch": 2.1573412454572587, "grad_norm": 0.053925688718312534, "learning_rate": 0.000125661937964267, "loss": 0.8478, "step": 122880 }, { "epoch": 2.15751681033726, "grad_norm": 0.045564125432769, "learning_rate": 0.00012565114547761943, "loss": 0.8492, "step": 122890 }, { "epoch": 2.1576923752172616, "grad_norm": 0.04653895956202837, "learning_rate": 0.00012564035267863713, "loss": 0.8452, "step": 122900 }, { "epoch": 2.157867940097263, "grad_norm": 0.06161567557037066, "learning_rate": 0.0001256295595674569, "loss": 0.8445, "step": 122910 }, { "epoch": 2.158043504977264, "grad_norm": 0.06476290698907873, "learning_rate": 0.00012561876614421535, "loss": 0.8474, "step": 122920 }, { "epoch": 2.1582190698572656, "grad_norm": 0.048937156795896865, "learning_rate": 0.00012560797240904935, "loss": 0.8464, "step": 122930 }, { "epoch": 2.158394634737267, "grad_norm": 0.05086648272963853, "learning_rate": 0.00012559717836209563, "loss": 0.8471, "step": 122940 }, { "epoch": 2.1585701996172686, "grad_norm": 0.04829066574372977, "learning_rate": 0.0001255863840034909, "loss": 0.8536, "step": 122950 }, { "epoch": 2.15874576449727, "grad_norm": 0.05765167798897579, "learning_rate": 0.00012557558933337204, "loss": 0.8404, "step": 122960 }, { "epoch": 2.1589213293772715, "grad_norm": 0.05030253783243984, "learning_rate": 0.0001255647943518757, "loss": 0.8442, "step": 122970 }, { "epoch": 2.1590968942572726, "grad_norm": 0.05767912040180794, "learning_rate": 0.00012555399905913873, "loss": 0.8448, "step": 122980 }, { "epoch": 2.159272459137274, "grad_norm": 0.0637417362077183, "learning_rate": 0.0001255432034552979, "loss": 0.8506, "step": 122990 }, { "epoch": 2.1594480240172755, "grad_norm": 0.08153551978695114, "learning_rate": 0.00012553240754048994, "loss": 0.8497, "step": 123000 }, { "epoch": 2.159623588897277, "grad_norm": 0.06675163498915841, "learning_rate": 0.0001255216113148517, "loss": 0.8464, "step": 123010 }, { "epoch": 2.1597991537772785, "grad_norm": 0.04587275272346365, "learning_rate": 0.00012551081477851994, "loss": 0.8432, "step": 123020 }, { "epoch": 2.15997471865728, "grad_norm": 0.06031215515388103, "learning_rate": 0.00012550001793163148, "loss": 0.8424, "step": 123030 }, { "epoch": 2.160150283537281, "grad_norm": 0.04516047979616498, "learning_rate": 0.00012548922077432306, "loss": 0.8477, "step": 123040 }, { "epoch": 2.1603258484172825, "grad_norm": 0.045989521734667624, "learning_rate": 0.00012547842330673157, "loss": 0.8457, "step": 123050 }, { "epoch": 2.160501413297284, "grad_norm": 0.04883277531094337, "learning_rate": 0.00012546762552899368, "loss": 0.8401, "step": 123060 }, { "epoch": 2.1606769781772854, "grad_norm": 0.049191545954761065, "learning_rate": 0.00012545682744124635, "loss": 0.8497, "step": 123070 }, { "epoch": 2.160852543057287, "grad_norm": 0.045593609210218926, "learning_rate": 0.00012544602904362634, "loss": 0.8453, "step": 123080 }, { "epoch": 2.1610281079372884, "grad_norm": 0.059883756748748095, "learning_rate": 0.00012543523033627042, "loss": 0.8485, "step": 123090 }, { "epoch": 2.1612036728172894, "grad_norm": 0.044976177361046046, "learning_rate": 0.00012542443131931548, "loss": 0.8425, "step": 123100 }, { "epoch": 2.161379237697291, "grad_norm": 0.06184425734460933, "learning_rate": 0.00012541363199289828, "loss": 0.8462, "step": 123110 }, { "epoch": 2.1615548025772924, "grad_norm": 0.05593073168708114, "learning_rate": 0.0001254028323571557, "loss": 0.8429, "step": 123120 }, { "epoch": 2.161730367457294, "grad_norm": 0.06971427864116034, "learning_rate": 0.00012539203241222456, "loss": 0.8482, "step": 123130 }, { "epoch": 2.1619059323372953, "grad_norm": 0.06242565953601442, "learning_rate": 0.0001253812321582417, "loss": 0.8477, "step": 123140 }, { "epoch": 2.162081497217297, "grad_norm": 0.04491850539715488, "learning_rate": 0.00012537043159534396, "loss": 0.8524, "step": 123150 }, { "epoch": 2.1622570620972983, "grad_norm": 0.05563688745234271, "learning_rate": 0.00012535963072366814, "loss": 0.85, "step": 123160 }, { "epoch": 2.1624326269772993, "grad_norm": 0.0611989365859918, "learning_rate": 0.0001253488295433512, "loss": 0.8424, "step": 123170 }, { "epoch": 2.162608191857301, "grad_norm": 0.056068738686743745, "learning_rate": 0.00012533802805452985, "loss": 0.8466, "step": 123180 }, { "epoch": 2.1627837567373023, "grad_norm": 0.057269354489344754, "learning_rate": 0.00012532722625734107, "loss": 0.8478, "step": 123190 }, { "epoch": 2.1629593216173038, "grad_norm": 0.06369443027272213, "learning_rate": 0.00012531642415192167, "loss": 0.8435, "step": 123200 }, { "epoch": 2.1631348864973052, "grad_norm": 0.06505753874853729, "learning_rate": 0.00012530562173840849, "loss": 0.8398, "step": 123210 }, { "epoch": 2.1633104513773063, "grad_norm": 0.05082455639676722, "learning_rate": 0.00012529481901693844, "loss": 0.8372, "step": 123220 }, { "epoch": 2.1634860162573077, "grad_norm": 0.04854103658516852, "learning_rate": 0.00012528401598764833, "loss": 0.8508, "step": 123230 }, { "epoch": 2.163661581137309, "grad_norm": 0.057505223975851436, "learning_rate": 0.00012527321265067516, "loss": 0.8406, "step": 123240 }, { "epoch": 2.1638371460173107, "grad_norm": 0.06693783669132056, "learning_rate": 0.0001252624090061557, "loss": 0.8504, "step": 123250 }, { "epoch": 2.164012710897312, "grad_norm": 0.07940519119044576, "learning_rate": 0.00012525160505422685, "loss": 0.8357, "step": 123260 }, { "epoch": 2.1641882757773137, "grad_norm": 0.0614080039982501, "learning_rate": 0.00012524080079502555, "loss": 0.8497, "step": 123270 }, { "epoch": 2.164363840657315, "grad_norm": 0.047181637722750726, "learning_rate": 0.00012522999622868863, "loss": 0.8446, "step": 123280 }, { "epoch": 2.164539405537316, "grad_norm": 0.07182754052455327, "learning_rate": 0.00012521919135535304, "loss": 0.849, "step": 123290 }, { "epoch": 2.1647149704173176, "grad_norm": 0.04454859955951449, "learning_rate": 0.00012520838617515565, "loss": 0.848, "step": 123300 }, { "epoch": 2.164890535297319, "grad_norm": 0.06574497102174334, "learning_rate": 0.00012519758068823333, "loss": 0.855, "step": 123310 }, { "epoch": 2.1650661001773206, "grad_norm": 0.06005489652659552, "learning_rate": 0.00012518677489472307, "loss": 0.8474, "step": 123320 }, { "epoch": 2.165241665057322, "grad_norm": 0.05388424570768922, "learning_rate": 0.0001251759687947617, "loss": 0.8455, "step": 123330 }, { "epoch": 2.165417229937323, "grad_norm": 0.059018544713113096, "learning_rate": 0.0001251651623884862, "loss": 0.8424, "step": 123340 }, { "epoch": 2.1655927948173246, "grad_norm": 0.05256314635782841, "learning_rate": 0.00012515435567603345, "loss": 0.8328, "step": 123350 }, { "epoch": 2.165768359697326, "grad_norm": 0.06056426446387385, "learning_rate": 0.00012514354865754038, "loss": 0.8379, "step": 123360 }, { "epoch": 2.1659439245773275, "grad_norm": 0.05477629759328827, "learning_rate": 0.00012513274133314395, "loss": 0.8423, "step": 123370 }, { "epoch": 2.166119489457329, "grad_norm": 0.059053477505771015, "learning_rate": 0.00012512193370298102, "loss": 0.8551, "step": 123380 }, { "epoch": 2.1662950543373305, "grad_norm": 0.06311385385411306, "learning_rate": 0.0001251111257671886, "loss": 0.8394, "step": 123390 }, { "epoch": 2.166470619217332, "grad_norm": 0.05822866871566747, "learning_rate": 0.00012510031752590358, "loss": 0.8476, "step": 123400 }, { "epoch": 2.166646184097333, "grad_norm": 0.06829294346106853, "learning_rate": 0.00012508950897926292, "loss": 0.8487, "step": 123410 }, { "epoch": 2.1668217489773345, "grad_norm": 0.06356539122858655, "learning_rate": 0.0001250787001274036, "loss": 0.8424, "step": 123420 }, { "epoch": 2.166997313857336, "grad_norm": 0.06478674937609141, "learning_rate": 0.00012506789097046247, "loss": 0.8448, "step": 123430 }, { "epoch": 2.1671728787373374, "grad_norm": 0.07808897287527729, "learning_rate": 0.0001250570815085766, "loss": 0.8519, "step": 123440 }, { "epoch": 2.167348443617339, "grad_norm": 0.05433814547957261, "learning_rate": 0.00012504627174188287, "loss": 0.8438, "step": 123450 }, { "epoch": 2.1675240084973404, "grad_norm": 0.0631556568939575, "learning_rate": 0.00012503546167051826, "loss": 0.8491, "step": 123460 }, { "epoch": 2.1676995733773414, "grad_norm": 0.05943117199218949, "learning_rate": 0.00012502465129461977, "loss": 0.8428, "step": 123470 }, { "epoch": 2.167875138257343, "grad_norm": 0.06507241623617935, "learning_rate": 0.00012501384061432434, "loss": 0.8451, "step": 123480 }, { "epoch": 2.1680507031373444, "grad_norm": 0.06361072455034496, "learning_rate": 0.00012500302962976893, "loss": 0.8517, "step": 123490 }, { "epoch": 2.168226268017346, "grad_norm": 0.06402255512225936, "learning_rate": 0.00012499221834109053, "loss": 0.8408, "step": 123500 }, { "epoch": 2.1684018328973473, "grad_norm": 0.05548907026850045, "learning_rate": 0.00012498140674842615, "loss": 0.8518, "step": 123510 }, { "epoch": 2.168577397777349, "grad_norm": 0.07947448772909059, "learning_rate": 0.00012497059485191273, "loss": 0.8436, "step": 123520 }, { "epoch": 2.16875296265735, "grad_norm": 0.045572009436122995, "learning_rate": 0.0001249597826516873, "loss": 0.84, "step": 123530 }, { "epoch": 2.1689285275373513, "grad_norm": 0.07358715312892573, "learning_rate": 0.00012494897014788683, "loss": 0.8442, "step": 123540 }, { "epoch": 2.169104092417353, "grad_norm": 0.06645140275702627, "learning_rate": 0.0001249381573406483, "loss": 0.8386, "step": 123550 }, { "epoch": 2.1692796572973543, "grad_norm": 0.05634174523086861, "learning_rate": 0.00012492734423010876, "loss": 0.8479, "step": 123560 }, { "epoch": 2.1694552221773558, "grad_norm": 0.05573696773519324, "learning_rate": 0.00012491653081640513, "loss": 0.846, "step": 123570 }, { "epoch": 2.1696307870573572, "grad_norm": 0.07663758710610388, "learning_rate": 0.00012490571709967453, "loss": 0.8493, "step": 123580 }, { "epoch": 2.1698063519373583, "grad_norm": 0.04700266760723505, "learning_rate": 0.0001248949030800539, "loss": 0.8463, "step": 123590 }, { "epoch": 2.1699819168173597, "grad_norm": 0.05225052774266155, "learning_rate": 0.00012488408875768023, "loss": 0.8463, "step": 123600 }, { "epoch": 2.1701574816973612, "grad_norm": 0.06193158481254446, "learning_rate": 0.00012487327413269062, "loss": 0.842, "step": 123610 }, { "epoch": 2.1703330465773627, "grad_norm": 0.04697554980432962, "learning_rate": 0.000124862459205222, "loss": 0.8545, "step": 123620 }, { "epoch": 2.170508611457364, "grad_norm": 0.056739524341885776, "learning_rate": 0.0001248516439754115, "loss": 0.8422, "step": 123630 }, { "epoch": 2.1706841763373657, "grad_norm": 0.08742530181387005, "learning_rate": 0.00012484082844339608, "loss": 0.8412, "step": 123640 }, { "epoch": 2.1708597412173667, "grad_norm": 0.07013446733397781, "learning_rate": 0.0001248300126093128, "loss": 0.8526, "step": 123650 }, { "epoch": 2.171035306097368, "grad_norm": 0.05553531618005677, "learning_rate": 0.00012481919647329873, "loss": 0.8492, "step": 123660 }, { "epoch": 2.1712108709773696, "grad_norm": 0.051912876363076794, "learning_rate": 0.0001248083800354908, "loss": 0.8449, "step": 123670 }, { "epoch": 2.171386435857371, "grad_norm": 0.06386722963633953, "learning_rate": 0.00012479756329602617, "loss": 0.8441, "step": 123680 }, { "epoch": 2.1715620007373726, "grad_norm": 0.045878547726121245, "learning_rate": 0.00012478674625504183, "loss": 0.8451, "step": 123690 }, { "epoch": 2.171737565617374, "grad_norm": 0.05947409673084285, "learning_rate": 0.0001247759289126749, "loss": 0.8488, "step": 123700 }, { "epoch": 2.171913130497375, "grad_norm": 0.05373187644567895, "learning_rate": 0.0001247651112690624, "loss": 0.8344, "step": 123710 }, { "epoch": 2.1720886953773766, "grad_norm": 0.06487902252994918, "learning_rate": 0.00012475429332434133, "loss": 0.841, "step": 123720 }, { "epoch": 2.172264260257378, "grad_norm": 0.05693690294324708, "learning_rate": 0.00012474347507864883, "loss": 0.8466, "step": 123730 }, { "epoch": 2.1724398251373795, "grad_norm": 0.06380406904541203, "learning_rate": 0.00012473265653212195, "loss": 0.8399, "step": 123740 }, { "epoch": 2.172615390017381, "grad_norm": 0.04708442362705157, "learning_rate": 0.0001247218376848978, "loss": 0.846, "step": 123750 }, { "epoch": 2.1727909548973825, "grad_norm": 0.05921448394526895, "learning_rate": 0.00012471101853711336, "loss": 0.8486, "step": 123760 }, { "epoch": 2.1729665197773835, "grad_norm": 0.06391038415863211, "learning_rate": 0.0001247001990889058, "loss": 0.845, "step": 123770 }, { "epoch": 2.173142084657385, "grad_norm": 0.09526912248201307, "learning_rate": 0.0001246893793404122, "loss": 0.8481, "step": 123780 }, { "epoch": 2.1733176495373865, "grad_norm": 0.0519164137466698, "learning_rate": 0.00012467855929176958, "loss": 0.8416, "step": 123790 }, { "epoch": 2.173493214417388, "grad_norm": 0.058377102564079186, "learning_rate": 0.00012466773894311513, "loss": 0.8476, "step": 123800 }, { "epoch": 2.1736687792973894, "grad_norm": 0.05677061349491721, "learning_rate": 0.00012465691829458583, "loss": 0.846, "step": 123810 }, { "epoch": 2.173844344177391, "grad_norm": 0.05369148146952836, "learning_rate": 0.0001246460973463189, "loss": 0.8518, "step": 123820 }, { "epoch": 2.174019909057392, "grad_norm": 0.05165790745881904, "learning_rate": 0.00012463527609845134, "loss": 0.8444, "step": 123830 }, { "epoch": 2.1741954739373934, "grad_norm": 0.060151186607304834, "learning_rate": 0.0001246244545511203, "loss": 0.843, "step": 123840 }, { "epoch": 2.174371038817395, "grad_norm": 0.06910895875769794, "learning_rate": 0.0001246136327044629, "loss": 0.8429, "step": 123850 }, { "epoch": 2.1745466036973964, "grad_norm": 0.056964762466887454, "learning_rate": 0.0001246028105586163, "loss": 0.8406, "step": 123860 }, { "epoch": 2.174722168577398, "grad_norm": 0.0542786903149581, "learning_rate": 0.00012459198811371752, "loss": 0.8513, "step": 123870 }, { "epoch": 2.1748977334573993, "grad_norm": 0.08090676072541725, "learning_rate": 0.00012458116536990374, "loss": 0.8434, "step": 123880 }, { "epoch": 2.1750732983374004, "grad_norm": 0.14160801463940786, "learning_rate": 0.00012457034232731207, "loss": 0.8471, "step": 123890 }, { "epoch": 2.175248863217402, "grad_norm": 0.07653226554955579, "learning_rate": 0.00012455951898607961, "loss": 0.8521, "step": 123900 }, { "epoch": 2.1754244280974033, "grad_norm": 0.05255207404237297, "learning_rate": 0.00012454869534634356, "loss": 0.8435, "step": 123910 }, { "epoch": 2.175599992977405, "grad_norm": 0.06543145640924555, "learning_rate": 0.00012453787140824103, "loss": 0.8497, "step": 123920 }, { "epoch": 2.1757755578574063, "grad_norm": 0.07256036919523151, "learning_rate": 0.00012452704717190918, "loss": 0.8552, "step": 123930 }, { "epoch": 2.1759511227374078, "grad_norm": 0.04723772983124345, "learning_rate": 0.0001245162226374851, "loss": 0.8472, "step": 123940 }, { "epoch": 2.1761266876174092, "grad_norm": 0.057120876966958285, "learning_rate": 0.00012450539780510598, "loss": 0.8441, "step": 123950 }, { "epoch": 2.1763022524974103, "grad_norm": 0.05362422174651468, "learning_rate": 0.000124494572674909, "loss": 0.8489, "step": 123960 }, { "epoch": 2.1764778173774117, "grad_norm": 0.07465561882241571, "learning_rate": 0.00012448374724703125, "loss": 0.8478, "step": 123970 }, { "epoch": 2.1766533822574132, "grad_norm": 0.045602415122536204, "learning_rate": 0.00012447292152160993, "loss": 0.8451, "step": 123980 }, { "epoch": 2.1768289471374147, "grad_norm": 0.04607392409797833, "learning_rate": 0.00012446209549878218, "loss": 0.8443, "step": 123990 }, { "epoch": 2.177004512017416, "grad_norm": 0.04890620727085508, "learning_rate": 0.0001244512691786852, "loss": 0.8475, "step": 124000 }, { "epoch": 2.177180076897417, "grad_norm": 0.054246675579755675, "learning_rate": 0.00012444044256145612, "loss": 0.8509, "step": 124010 }, { "epoch": 2.1773556417774187, "grad_norm": 0.04379280732011523, "learning_rate": 0.00012442961564723218, "loss": 0.8504, "step": 124020 }, { "epoch": 2.17753120665742, "grad_norm": 0.05093156004712589, "learning_rate": 0.00012441878843615046, "loss": 0.8582, "step": 124030 }, { "epoch": 2.1777067715374216, "grad_norm": 0.053671132305850695, "learning_rate": 0.00012440796092834828, "loss": 0.85, "step": 124040 }, { "epoch": 2.177882336417423, "grad_norm": 0.06431349002105705, "learning_rate": 0.0001243971331239627, "loss": 0.8478, "step": 124050 }, { "epoch": 2.1780579012974246, "grad_norm": 0.04986083115892917, "learning_rate": 0.00012438630502313095, "loss": 0.8486, "step": 124060 }, { "epoch": 2.178233466177426, "grad_norm": 0.05362001593394558, "learning_rate": 0.00012437547662599028, "loss": 0.8502, "step": 124070 }, { "epoch": 2.178409031057427, "grad_norm": 0.06353331901311271, "learning_rate": 0.0001243646479326778, "loss": 0.8417, "step": 124080 }, { "epoch": 2.1785845959374286, "grad_norm": 0.056419079152094306, "learning_rate": 0.00012435381894333075, "loss": 0.8449, "step": 124090 }, { "epoch": 2.17876016081743, "grad_norm": 0.07978454010103818, "learning_rate": 0.00012434298965808638, "loss": 0.8487, "step": 124100 }, { "epoch": 2.1789357256974315, "grad_norm": 0.048076991343196845, "learning_rate": 0.00012433216007708183, "loss": 0.8496, "step": 124110 }, { "epoch": 2.179111290577433, "grad_norm": 0.04703085929474038, "learning_rate": 0.00012432133020045435, "loss": 0.8498, "step": 124120 }, { "epoch": 2.1792868554574345, "grad_norm": 0.057300506462325144, "learning_rate": 0.00012431050002834111, "loss": 0.8524, "step": 124130 }, { "epoch": 2.1794624203374355, "grad_norm": 0.05639639576668472, "learning_rate": 0.00012429966956087942, "loss": 0.8466, "step": 124140 }, { "epoch": 2.179637985217437, "grad_norm": 0.04696763244393638, "learning_rate": 0.00012428883879820644, "loss": 0.8525, "step": 124150 }, { "epoch": 2.1798135500974385, "grad_norm": 0.06309630483796173, "learning_rate": 0.0001242780077404594, "loss": 0.851, "step": 124160 }, { "epoch": 2.17998911497744, "grad_norm": 0.06435072369298316, "learning_rate": 0.00012426717638777551, "loss": 0.8395, "step": 124170 }, { "epoch": 2.1801646798574414, "grad_norm": 0.09548616792871292, "learning_rate": 0.00012425634474029206, "loss": 0.8451, "step": 124180 }, { "epoch": 2.180340244737443, "grad_norm": 0.052627675024609744, "learning_rate": 0.00012424551279814627, "loss": 0.8521, "step": 124190 }, { "epoch": 2.180515809617444, "grad_norm": 0.04863765858051558, "learning_rate": 0.00012423468056147537, "loss": 0.8552, "step": 124200 }, { "epoch": 2.1806913744974454, "grad_norm": 0.06982796803111618, "learning_rate": 0.00012422384803041663, "loss": 0.8478, "step": 124210 }, { "epoch": 2.180866939377447, "grad_norm": 0.04835613608075363, "learning_rate": 0.0001242130152051073, "loss": 0.8472, "step": 124220 }, { "epoch": 2.1810425042574484, "grad_norm": 0.04815058462228203, "learning_rate": 0.00012420218208568457, "loss": 0.8471, "step": 124230 }, { "epoch": 2.18121806913745, "grad_norm": 0.0544713681628411, "learning_rate": 0.00012419134867228576, "loss": 0.8489, "step": 124240 }, { "epoch": 2.1813936340174513, "grad_norm": 0.0484822876831519, "learning_rate": 0.00012418051496504814, "loss": 0.8434, "step": 124250 }, { "epoch": 2.1815691988974524, "grad_norm": 0.06111312864960844, "learning_rate": 0.00012416968096410895, "loss": 0.844, "step": 124260 }, { "epoch": 2.181744763777454, "grad_norm": 0.06878806977171939, "learning_rate": 0.00012415884666960546, "loss": 0.8455, "step": 124270 }, { "epoch": 2.1819203286574553, "grad_norm": 0.0438092388533409, "learning_rate": 0.0001241480120816749, "loss": 0.8561, "step": 124280 }, { "epoch": 2.182095893537457, "grad_norm": 0.05533468143341329, "learning_rate": 0.00012413717720045466, "loss": 0.8478, "step": 124290 }, { "epoch": 2.1822714584174583, "grad_norm": 0.042214065010058775, "learning_rate": 0.00012412634202608192, "loss": 0.8478, "step": 124300 }, { "epoch": 2.1824470232974598, "grad_norm": 0.08879199941465796, "learning_rate": 0.00012411550655869397, "loss": 0.8496, "step": 124310 }, { "epoch": 2.182622588177461, "grad_norm": 0.06221116538184695, "learning_rate": 0.00012410467079842816, "loss": 0.8423, "step": 124320 }, { "epoch": 2.1827981530574623, "grad_norm": 0.07666458266636257, "learning_rate": 0.00012409383474542172, "loss": 0.844, "step": 124330 }, { "epoch": 2.1829737179374638, "grad_norm": 0.07460389627093264, "learning_rate": 0.00012408299839981196, "loss": 0.8494, "step": 124340 }, { "epoch": 2.1831492828174652, "grad_norm": 0.05543471257627494, "learning_rate": 0.00012407216176173622, "loss": 0.8436, "step": 124350 }, { "epoch": 2.1833248476974667, "grad_norm": 0.06106819196889984, "learning_rate": 0.00012406132483133174, "loss": 0.8388, "step": 124360 }, { "epoch": 2.183500412577468, "grad_norm": 0.06775570594240572, "learning_rate": 0.00012405048760873587, "loss": 0.8461, "step": 124370 }, { "epoch": 2.183675977457469, "grad_norm": 0.05598915062536177, "learning_rate": 0.0001240396500940859, "loss": 0.8519, "step": 124380 }, { "epoch": 2.1838515423374707, "grad_norm": 0.05491776060283023, "learning_rate": 0.00012402881228751916, "loss": 0.8518, "step": 124390 }, { "epoch": 2.184027107217472, "grad_norm": 0.05716590290636651, "learning_rate": 0.00012401797418917294, "loss": 0.8405, "step": 124400 }, { "epoch": 2.1842026720974737, "grad_norm": 0.11126827939467303, "learning_rate": 0.00012400713579918457, "loss": 0.8385, "step": 124410 }, { "epoch": 2.184378236977475, "grad_norm": 0.048945369557086546, "learning_rate": 0.0001239962971176914, "loss": 0.8508, "step": 124420 }, { "epoch": 2.1845538018574766, "grad_norm": 0.0733136288547381, "learning_rate": 0.0001239854581448307, "loss": 0.8518, "step": 124430 }, { "epoch": 2.1847293667374776, "grad_norm": 0.04949587556250827, "learning_rate": 0.0001239746188807399, "loss": 0.857, "step": 124440 }, { "epoch": 2.184904931617479, "grad_norm": 0.08384599385636037, "learning_rate": 0.00012396377932555623, "loss": 0.8504, "step": 124450 }, { "epoch": 2.1850804964974806, "grad_norm": 0.05707377086070871, "learning_rate": 0.0001239529394794171, "loss": 0.8367, "step": 124460 }, { "epoch": 2.185256061377482, "grad_norm": 0.053669842841630226, "learning_rate": 0.00012394209934245978, "loss": 0.8483, "step": 124470 }, { "epoch": 2.1854316262574836, "grad_norm": 0.05230729913594521, "learning_rate": 0.00012393125891482171, "loss": 0.8443, "step": 124480 }, { "epoch": 2.185607191137485, "grad_norm": 0.0533293088886247, "learning_rate": 0.0001239204181966402, "loss": 0.8406, "step": 124490 }, { "epoch": 2.185782756017486, "grad_norm": 0.054164319471502784, "learning_rate": 0.00012390957718805257, "loss": 0.8403, "step": 124500 }, { "epoch": 2.1859583208974875, "grad_norm": 0.06204044833086338, "learning_rate": 0.00012389873588919623, "loss": 0.8388, "step": 124510 }, { "epoch": 2.186133885777489, "grad_norm": 0.06683614347790409, "learning_rate": 0.00012388789430020847, "loss": 0.8476, "step": 124520 }, { "epoch": 2.1863094506574905, "grad_norm": 0.06120919276672839, "learning_rate": 0.00012387705242122675, "loss": 0.8439, "step": 124530 }, { "epoch": 2.186485015537492, "grad_norm": 0.06109698335312943, "learning_rate": 0.00012386621025238837, "loss": 0.8409, "step": 124540 }, { "epoch": 2.1866605804174934, "grad_norm": 0.06959552684227063, "learning_rate": 0.0001238553677938308, "loss": 0.841, "step": 124550 }, { "epoch": 2.1868361452974945, "grad_norm": 0.0524760032244521, "learning_rate": 0.00012384452504569127, "loss": 0.8455, "step": 124560 }, { "epoch": 2.187011710177496, "grad_norm": 0.05884108742890781, "learning_rate": 0.0001238336820081072, "loss": 0.8399, "step": 124570 }, { "epoch": 2.1871872750574974, "grad_norm": 0.06346271336278474, "learning_rate": 0.00012382283868121605, "loss": 0.8464, "step": 124580 }, { "epoch": 2.187362839937499, "grad_norm": 0.057504717273171455, "learning_rate": 0.00012381199506515512, "loss": 0.8426, "step": 124590 }, { "epoch": 2.1875384048175004, "grad_norm": 0.06529070901163926, "learning_rate": 0.0001238011511600619, "loss": 0.8454, "step": 124600 }, { "epoch": 2.187713969697502, "grad_norm": 0.06331793044008517, "learning_rate": 0.0001237903069660737, "loss": 0.8422, "step": 124610 }, { "epoch": 2.1878895345775033, "grad_norm": 0.06832443647920729, "learning_rate": 0.00012377946248332792, "loss": 0.8494, "step": 124620 }, { "epoch": 2.1880650994575044, "grad_norm": 0.048776861731735156, "learning_rate": 0.00012376861771196202, "loss": 0.8386, "step": 124630 }, { "epoch": 2.188240664337506, "grad_norm": 0.07169812032552944, "learning_rate": 0.00012375777265211333, "loss": 0.8498, "step": 124640 }, { "epoch": 2.1884162292175073, "grad_norm": 0.07225922269930124, "learning_rate": 0.00012374692730391932, "loss": 0.8465, "step": 124650 }, { "epoch": 2.188591794097509, "grad_norm": 0.04104672223341215, "learning_rate": 0.0001237360816675174, "loss": 0.8451, "step": 124660 }, { "epoch": 2.1887673589775103, "grad_norm": 0.05695683053575399, "learning_rate": 0.00012372523574304494, "loss": 0.8494, "step": 124670 }, { "epoch": 2.1889429238575113, "grad_norm": 0.04816120329970796, "learning_rate": 0.0001237143895306394, "loss": 0.8489, "step": 124680 }, { "epoch": 2.189118488737513, "grad_norm": 0.04423390303753773, "learning_rate": 0.00012370354303043815, "loss": 0.8408, "step": 124690 }, { "epoch": 2.1892940536175143, "grad_norm": 0.05406996108359208, "learning_rate": 0.00012369269624257873, "loss": 0.8513, "step": 124700 }, { "epoch": 2.1894696184975158, "grad_norm": 0.05862277206954831, "learning_rate": 0.00012368184916719844, "loss": 0.8485, "step": 124710 }, { "epoch": 2.1896451833775172, "grad_norm": 0.05772796682316606, "learning_rate": 0.00012367100180443483, "loss": 0.853, "step": 124720 }, { "epoch": 2.1898207482575187, "grad_norm": 0.050587670555889926, "learning_rate": 0.00012366015415442525, "loss": 0.8416, "step": 124730 }, { "epoch": 2.18999631313752, "grad_norm": 0.07073559777962278, "learning_rate": 0.00012364930621730716, "loss": 0.8494, "step": 124740 }, { "epoch": 2.1901718780175212, "grad_norm": 0.05871586037914789, "learning_rate": 0.00012363845799321806, "loss": 0.8389, "step": 124750 }, { "epoch": 2.1903474428975227, "grad_norm": 0.05730734696712359, "learning_rate": 0.0001236276094822953, "loss": 0.8347, "step": 124760 }, { "epoch": 2.190523007777524, "grad_norm": 0.05996872474304952, "learning_rate": 0.00012361676068467644, "loss": 0.8444, "step": 124770 }, { "epoch": 2.1906985726575257, "grad_norm": 0.0518479738809413, "learning_rate": 0.00012360591160049888, "loss": 0.8385, "step": 124780 }, { "epoch": 2.190874137537527, "grad_norm": 0.05952806614851918, "learning_rate": 0.00012359506222990008, "loss": 0.8276, "step": 124790 }, { "epoch": 2.191049702417528, "grad_norm": 0.05852579679051964, "learning_rate": 0.00012358421257301753, "loss": 0.8469, "step": 124800 }, { "epoch": 2.1912252672975296, "grad_norm": 0.04848725955274247, "learning_rate": 0.00012357336262998865, "loss": 0.8453, "step": 124810 }, { "epoch": 2.191400832177531, "grad_norm": 0.045949866241660114, "learning_rate": 0.00012356251240095093, "loss": 0.8495, "step": 124820 }, { "epoch": 2.1915763970575326, "grad_norm": 0.06354117190313383, "learning_rate": 0.00012355166188604192, "loss": 0.8475, "step": 124830 }, { "epoch": 2.191751961937534, "grad_norm": 0.05381139733017475, "learning_rate": 0.00012354081108539896, "loss": 0.8432, "step": 124840 }, { "epoch": 2.1919275268175356, "grad_norm": 0.06361854333189433, "learning_rate": 0.00012352995999915964, "loss": 0.8437, "step": 124850 }, { "epoch": 2.192103091697537, "grad_norm": 0.058228716539792356, "learning_rate": 0.00012351910862746138, "loss": 0.8388, "step": 124860 }, { "epoch": 2.192278656577538, "grad_norm": 0.07456602380192723, "learning_rate": 0.00012350825697044174, "loss": 0.849, "step": 124870 }, { "epoch": 2.1924542214575395, "grad_norm": 0.061739578946245655, "learning_rate": 0.00012349740502823814, "loss": 0.8459, "step": 124880 }, { "epoch": 2.192629786337541, "grad_norm": 0.07450989205189124, "learning_rate": 0.0001234865528009881, "loss": 0.8419, "step": 124890 }, { "epoch": 2.1928053512175425, "grad_norm": 0.06421089159694598, "learning_rate": 0.00012347570028882916, "loss": 0.847, "step": 124900 }, { "epoch": 2.192980916097544, "grad_norm": 0.05643411795373679, "learning_rate": 0.00012346484749189876, "loss": 0.8434, "step": 124910 }, { "epoch": 2.1931564809775455, "grad_norm": 0.07492104307522619, "learning_rate": 0.00012345399441033447, "loss": 0.844, "step": 124920 }, { "epoch": 2.1933320458575465, "grad_norm": 0.07521773989896895, "learning_rate": 0.00012344314104427372, "loss": 0.8525, "step": 124930 }, { "epoch": 2.193507610737548, "grad_norm": 0.044729428391283604, "learning_rate": 0.0001234322873938541, "loss": 0.8461, "step": 124940 }, { "epoch": 2.1936831756175494, "grad_norm": 0.04876994067571262, "learning_rate": 0.0001234214334592131, "loss": 0.8522, "step": 124950 }, { "epoch": 2.193858740497551, "grad_norm": 0.04506581021993961, "learning_rate": 0.00012341057924048822, "loss": 0.8506, "step": 124960 }, { "epoch": 2.1940343053775524, "grad_norm": 0.04979531119707928, "learning_rate": 0.00012339972473781704, "loss": 0.8443, "step": 124970 }, { "epoch": 2.194209870257554, "grad_norm": 0.15702013656830915, "learning_rate": 0.00012338886995133703, "loss": 0.8499, "step": 124980 }, { "epoch": 2.194385435137555, "grad_norm": 0.053146515720667364, "learning_rate": 0.00012337801488118574, "loss": 0.8371, "step": 124990 }, { "epoch": 2.1945610000175564, "grad_norm": 0.061224050383658496, "learning_rate": 0.00012336715952750076, "loss": 0.8515, "step": 125000 }, { "epoch": 2.194736564897558, "grad_norm": 0.06629138656234636, "learning_rate": 0.00012335630389041953, "loss": 0.8479, "step": 125010 }, { "epoch": 2.1949121297775593, "grad_norm": 0.05300960359027961, "learning_rate": 0.0001233454479700797, "loss": 0.8487, "step": 125020 }, { "epoch": 2.195087694657561, "grad_norm": 0.055743151249398354, "learning_rate": 0.0001233345917666187, "loss": 0.8415, "step": 125030 }, { "epoch": 2.1952632595375623, "grad_norm": 0.059800211743205205, "learning_rate": 0.00012332373528017417, "loss": 0.8484, "step": 125040 }, { "epoch": 2.1954388244175633, "grad_norm": 0.06961328300368419, "learning_rate": 0.00012331287851088363, "loss": 0.8541, "step": 125050 }, { "epoch": 2.195614389297565, "grad_norm": 0.050941711498586346, "learning_rate": 0.00012330202145888466, "loss": 0.8456, "step": 125060 }, { "epoch": 2.1957899541775663, "grad_norm": 0.0497845061241817, "learning_rate": 0.0001232911641243148, "loss": 0.8431, "step": 125070 }, { "epoch": 2.1959655190575678, "grad_norm": 0.06516888247510592, "learning_rate": 0.00012328030650731158, "loss": 0.8441, "step": 125080 }, { "epoch": 2.1961410839375692, "grad_norm": 0.056651977927015595, "learning_rate": 0.00012326944860801265, "loss": 0.8525, "step": 125090 }, { "epoch": 2.1963166488175707, "grad_norm": 0.05260301829185189, "learning_rate": 0.00012325859042655552, "loss": 0.8458, "step": 125100 }, { "epoch": 2.1964922136975717, "grad_norm": 0.051478051535946284, "learning_rate": 0.0001232477319630778, "loss": 0.8503, "step": 125110 }, { "epoch": 2.1966677785775732, "grad_norm": 0.06019713037447807, "learning_rate": 0.00012323687321771703, "loss": 0.8439, "step": 125120 }, { "epoch": 2.1968433434575747, "grad_norm": 0.04970495064692608, "learning_rate": 0.0001232260141906108, "loss": 0.8431, "step": 125130 }, { "epoch": 2.197018908337576, "grad_norm": 0.05441628837102619, "learning_rate": 0.00012321515488189678, "loss": 0.8441, "step": 125140 }, { "epoch": 2.1971944732175777, "grad_norm": 0.049183790723191206, "learning_rate": 0.00012320429529171244, "loss": 0.8456, "step": 125150 }, { "epoch": 2.197370038097579, "grad_norm": 0.05617163181709665, "learning_rate": 0.00012319343542019543, "loss": 0.8429, "step": 125160 }, { "epoch": 2.19754560297758, "grad_norm": 0.06751041642333382, "learning_rate": 0.00012318257526748334, "loss": 0.8455, "step": 125170 }, { "epoch": 2.1977211678575816, "grad_norm": 0.07411392524987319, "learning_rate": 0.00012317171483371375, "loss": 0.8404, "step": 125180 }, { "epoch": 2.197896732737583, "grad_norm": 0.0527861840285676, "learning_rate": 0.00012316085411902432, "loss": 0.8453, "step": 125190 }, { "epoch": 2.1980722976175846, "grad_norm": 0.06590777969665389, "learning_rate": 0.00012314999312355258, "loss": 0.8389, "step": 125200 }, { "epoch": 2.198247862497586, "grad_norm": 0.0599513115938885, "learning_rate": 0.00012313913184743624, "loss": 0.8405, "step": 125210 }, { "epoch": 2.1984234273775876, "grad_norm": 0.06106611470021757, "learning_rate": 0.0001231282702908128, "loss": 0.8453, "step": 125220 }, { "epoch": 2.1985989922575886, "grad_norm": 0.10117208943172988, "learning_rate": 0.00012311740845381995, "loss": 0.8503, "step": 125230 }, { "epoch": 2.19877455713759, "grad_norm": 0.05165086137557909, "learning_rate": 0.00012310654633659532, "loss": 0.8459, "step": 125240 }, { "epoch": 2.1989501220175915, "grad_norm": 0.09034258197437707, "learning_rate": 0.00012309568393927648, "loss": 0.8464, "step": 125250 }, { "epoch": 2.199125686897593, "grad_norm": 0.06317236885176475, "learning_rate": 0.0001230848212620011, "loss": 0.84, "step": 125260 }, { "epoch": 2.1993012517775945, "grad_norm": 0.08407934057377935, "learning_rate": 0.0001230739583049068, "loss": 0.8467, "step": 125270 }, { "epoch": 2.199476816657596, "grad_norm": 0.061975442945358526, "learning_rate": 0.00012306309506813126, "loss": 0.8496, "step": 125280 }, { "epoch": 2.199652381537597, "grad_norm": 0.06781003575958125, "learning_rate": 0.00012305223155181203, "loss": 0.8419, "step": 125290 }, { "epoch": 2.1998279464175985, "grad_norm": 0.061315674002411766, "learning_rate": 0.0001230413677560868, "loss": 0.848, "step": 125300 }, { "epoch": 2.2000035112976, "grad_norm": 0.06567647760789999, "learning_rate": 0.00012303050368109325, "loss": 0.8443, "step": 125310 }, { "epoch": 2.2001790761776014, "grad_norm": 0.05733471971736188, "learning_rate": 0.00012301963932696895, "loss": 0.8478, "step": 125320 }, { "epoch": 2.200354641057603, "grad_norm": 0.0573010098286156, "learning_rate": 0.00012300877469385163, "loss": 0.8449, "step": 125330 }, { "epoch": 2.2005302059376044, "grad_norm": 0.05747132760890244, "learning_rate": 0.00012299790978187892, "loss": 0.8463, "step": 125340 }, { "epoch": 2.2007057708176054, "grad_norm": 0.0648700490782898, "learning_rate": 0.00012298704459118846, "loss": 0.8466, "step": 125350 }, { "epoch": 2.200881335697607, "grad_norm": 0.057046422741351084, "learning_rate": 0.00012297617912191794, "loss": 0.8318, "step": 125360 }, { "epoch": 2.2010569005776084, "grad_norm": 0.06687825196909523, "learning_rate": 0.000122965313374205, "loss": 0.8435, "step": 125370 }, { "epoch": 2.20123246545761, "grad_norm": 0.05219632397206237, "learning_rate": 0.00012295444734818737, "loss": 0.8529, "step": 125380 }, { "epoch": 2.2014080303376113, "grad_norm": 0.07331635524076144, "learning_rate": 0.00012294358104400266, "loss": 0.8401, "step": 125390 }, { "epoch": 2.201583595217613, "grad_norm": 0.05662271633042301, "learning_rate": 0.00012293271446178855, "loss": 0.8446, "step": 125400 }, { "epoch": 2.2017591600976143, "grad_norm": 0.05539631324940245, "learning_rate": 0.00012292184760168278, "loss": 0.8474, "step": 125410 }, { "epoch": 2.2019347249776153, "grad_norm": 0.05393086095220049, "learning_rate": 0.00012291098046382298, "loss": 0.8447, "step": 125420 }, { "epoch": 2.202110289857617, "grad_norm": 0.06558529740814115, "learning_rate": 0.00012290011304834687, "loss": 0.8432, "step": 125430 }, { "epoch": 2.2022858547376183, "grad_norm": 0.05036473399127462, "learning_rate": 0.0001228892453553921, "loss": 0.843, "step": 125440 }, { "epoch": 2.2024614196176198, "grad_norm": 0.06128331735382623, "learning_rate": 0.00012287837738509646, "loss": 0.8403, "step": 125450 }, { "epoch": 2.2026369844976212, "grad_norm": 0.0821845996812044, "learning_rate": 0.00012286750913759756, "loss": 0.8372, "step": 125460 }, { "epoch": 2.2028125493776223, "grad_norm": 0.05990086243924384, "learning_rate": 0.0001228566406130331, "loss": 0.8488, "step": 125470 }, { "epoch": 2.2029881142576238, "grad_norm": 0.07698032749352471, "learning_rate": 0.00012284577181154085, "loss": 0.8456, "step": 125480 }, { "epoch": 2.2031636791376252, "grad_norm": 0.048995946593386575, "learning_rate": 0.00012283490273325845, "loss": 0.8415, "step": 125490 }, { "epoch": 2.2033392440176267, "grad_norm": 0.050733362123132876, "learning_rate": 0.00012282403337832365, "loss": 0.8474, "step": 125500 }, { "epoch": 2.203514808897628, "grad_norm": 0.07306194442978454, "learning_rate": 0.0001228131637468742, "loss": 0.8412, "step": 125510 }, { "epoch": 2.2036903737776297, "grad_norm": 0.06245423144868065, "learning_rate": 0.00012280229383904777, "loss": 0.8412, "step": 125520 }, { "epoch": 2.203865938657631, "grad_norm": 0.05511804509860098, "learning_rate": 0.00012279142365498208, "loss": 0.8431, "step": 125530 }, { "epoch": 2.204041503537632, "grad_norm": 0.04622685627168382, "learning_rate": 0.00012278055319481487, "loss": 0.846, "step": 125540 }, { "epoch": 2.2042170684176337, "grad_norm": 0.07082454472505893, "learning_rate": 0.00012276968245868391, "loss": 0.834, "step": 125550 }, { "epoch": 2.204392633297635, "grad_norm": 0.046995905838768615, "learning_rate": 0.0001227588114467269, "loss": 0.8505, "step": 125560 }, { "epoch": 2.2045681981776366, "grad_norm": 0.07278887090022329, "learning_rate": 0.00012274794015908156, "loss": 0.8504, "step": 125570 }, { "epoch": 2.204743763057638, "grad_norm": 0.06551369508671201, "learning_rate": 0.00012273706859588569, "loss": 0.8445, "step": 125580 }, { "epoch": 2.2049193279376396, "grad_norm": 0.07609966660076732, "learning_rate": 0.00012272619675727692, "loss": 0.8418, "step": 125590 }, { "epoch": 2.2050948928176406, "grad_norm": 0.06689123011749996, "learning_rate": 0.00012271532464339313, "loss": 0.8467, "step": 125600 }, { "epoch": 2.205270457697642, "grad_norm": 0.04210652157157535, "learning_rate": 0.00012270445225437198, "loss": 0.8484, "step": 125610 }, { "epoch": 2.2054460225776436, "grad_norm": 0.04956489980164874, "learning_rate": 0.00012269357959035127, "loss": 0.8466, "step": 125620 }, { "epoch": 2.205621587457645, "grad_norm": 0.06518604230397582, "learning_rate": 0.0001226827066514688, "loss": 0.8371, "step": 125630 }, { "epoch": 2.2057971523376465, "grad_norm": 0.04965756110611197, "learning_rate": 0.00012267183343786221, "loss": 0.8463, "step": 125640 }, { "epoch": 2.205972717217648, "grad_norm": 0.05862716575130776, "learning_rate": 0.00012266095994966936, "loss": 0.8424, "step": 125650 }, { "epoch": 2.206148282097649, "grad_norm": 0.04056365706950594, "learning_rate": 0.00012265008618702803, "loss": 0.8484, "step": 125660 }, { "epoch": 2.2063238469776505, "grad_norm": 0.04750040146834537, "learning_rate": 0.00012263921215007592, "loss": 0.8433, "step": 125670 }, { "epoch": 2.206499411857652, "grad_norm": 0.05971387613135431, "learning_rate": 0.00012262833783895086, "loss": 0.8413, "step": 125680 }, { "epoch": 2.2066749767376534, "grad_norm": 0.05490319039989867, "learning_rate": 0.00012261746325379063, "loss": 0.8453, "step": 125690 }, { "epoch": 2.206850541617655, "grad_norm": 0.07802674220132272, "learning_rate": 0.00012260658839473296, "loss": 0.8491, "step": 125700 }, { "epoch": 2.2070261064976564, "grad_norm": 0.05672089388712474, "learning_rate": 0.00012259571326191568, "loss": 0.8463, "step": 125710 }, { "epoch": 2.2072016713776574, "grad_norm": 0.06635830308129866, "learning_rate": 0.0001225848378554766, "loss": 0.85, "step": 125720 }, { "epoch": 2.207377236257659, "grad_norm": 0.05346426411813844, "learning_rate": 0.00012257396217555349, "loss": 0.8458, "step": 125730 }, { "epoch": 2.2075528011376604, "grad_norm": 0.05256035196097201, "learning_rate": 0.00012256308622228413, "loss": 0.8526, "step": 125740 }, { "epoch": 2.207728366017662, "grad_norm": 0.061090920080678165, "learning_rate": 0.0001225522099958063, "loss": 0.8453, "step": 125750 }, { "epoch": 2.2079039308976633, "grad_norm": 0.06083036814929553, "learning_rate": 0.00012254133349625786, "loss": 0.8545, "step": 125760 }, { "epoch": 2.208079495777665, "grad_norm": 0.054618645146653017, "learning_rate": 0.00012253045672377664, "loss": 0.8369, "step": 125770 }, { "epoch": 2.208255060657666, "grad_norm": 0.055949130011165284, "learning_rate": 0.00012251957967850037, "loss": 0.8446, "step": 125780 }, { "epoch": 2.2084306255376673, "grad_norm": 0.0460511466644566, "learning_rate": 0.0001225087023605669, "loss": 0.8465, "step": 125790 }, { "epoch": 2.208606190417669, "grad_norm": 0.05387253807842644, "learning_rate": 0.00012249782477011408, "loss": 0.8438, "step": 125800 }, { "epoch": 2.2087817552976703, "grad_norm": 0.058319548848021166, "learning_rate": 0.00012248694690727966, "loss": 0.8475, "step": 125810 }, { "epoch": 2.2089573201776718, "grad_norm": 0.04787920684208883, "learning_rate": 0.00012247606877220153, "loss": 0.8497, "step": 125820 }, { "epoch": 2.2091328850576732, "grad_norm": 0.06443302738764249, "learning_rate": 0.00012246519036501746, "loss": 0.8438, "step": 125830 }, { "epoch": 2.2093084499376743, "grad_norm": 0.06723664384036332, "learning_rate": 0.00012245431168586535, "loss": 0.8401, "step": 125840 }, { "epoch": 2.2094840148176758, "grad_norm": 0.06132274617518927, "learning_rate": 0.00012244343273488302, "loss": 0.8335, "step": 125850 }, { "epoch": 2.2096595796976772, "grad_norm": 0.05644585427636818, "learning_rate": 0.00012243255351220822, "loss": 0.8486, "step": 125860 }, { "epoch": 2.2098351445776787, "grad_norm": 0.056816390465292375, "learning_rate": 0.0001224216740179789, "loss": 0.8482, "step": 125870 }, { "epoch": 2.21001070945768, "grad_norm": 0.05463519191612593, "learning_rate": 0.00012241079425233286, "loss": 0.846, "step": 125880 }, { "epoch": 2.2101862743376817, "grad_norm": 0.058992668618387, "learning_rate": 0.00012239991421540794, "loss": 0.8485, "step": 125890 }, { "epoch": 2.2103618392176827, "grad_norm": 0.053769683662493, "learning_rate": 0.00012238903390734203, "loss": 0.842, "step": 125900 }, { "epoch": 2.210537404097684, "grad_norm": 0.05335001081905554, "learning_rate": 0.00012237815332827296, "loss": 0.8488, "step": 125910 }, { "epoch": 2.2107129689776857, "grad_norm": 0.08882324544724791, "learning_rate": 0.00012236727247833857, "loss": 0.8506, "step": 125920 }, { "epoch": 2.210888533857687, "grad_norm": 0.058324125372274487, "learning_rate": 0.00012235639135767677, "loss": 0.8358, "step": 125930 }, { "epoch": 2.2110640987376886, "grad_norm": 0.05728458036400814, "learning_rate": 0.00012234550996642537, "loss": 0.8461, "step": 125940 }, { "epoch": 2.21123966361769, "grad_norm": 0.06869062245046836, "learning_rate": 0.00012233462830472225, "loss": 0.8455, "step": 125950 }, { "epoch": 2.211415228497691, "grad_norm": 0.049680874213661914, "learning_rate": 0.00012232374637270534, "loss": 0.8405, "step": 125960 }, { "epoch": 2.2115907933776926, "grad_norm": 0.053337751162601386, "learning_rate": 0.00012231286417051248, "loss": 0.8369, "step": 125970 }, { "epoch": 2.211766358257694, "grad_norm": 0.05537271130590268, "learning_rate": 0.00012230198169828154, "loss": 0.8458, "step": 125980 }, { "epoch": 2.2119419231376956, "grad_norm": 0.07133347002202642, "learning_rate": 0.00012229109895615041, "loss": 0.8564, "step": 125990 }, { "epoch": 2.212117488017697, "grad_norm": 0.061176276188035714, "learning_rate": 0.00012228021594425694, "loss": 0.8549, "step": 126000 }, { "epoch": 2.2122930528976985, "grad_norm": 0.07602315571020539, "learning_rate": 0.0001222693326627391, "loss": 0.8391, "step": 126010 }, { "epoch": 2.2124686177776995, "grad_norm": 0.06249039776680722, "learning_rate": 0.00012225844911173473, "loss": 0.8478, "step": 126020 }, { "epoch": 2.212644182657701, "grad_norm": 0.04655733895851769, "learning_rate": 0.0001222475652913817, "loss": 0.8422, "step": 126030 }, { "epoch": 2.2128197475377025, "grad_norm": 0.06552088603653033, "learning_rate": 0.000122236681201818, "loss": 0.8463, "step": 126040 }, { "epoch": 2.212995312417704, "grad_norm": 0.059192355323315765, "learning_rate": 0.00012222579684318142, "loss": 0.8401, "step": 126050 }, { "epoch": 2.2131708772977055, "grad_norm": 0.05694046352028415, "learning_rate": 0.00012221491221560998, "loss": 0.8411, "step": 126060 }, { "epoch": 2.213346442177707, "grad_norm": 0.05691325805312341, "learning_rate": 0.0001222040273192415, "loss": 0.8443, "step": 126070 }, { "epoch": 2.2135220070577084, "grad_norm": 0.058102169878842626, "learning_rate": 0.00012219314215421395, "loss": 0.8421, "step": 126080 }, { "epoch": 2.2136975719377094, "grad_norm": 0.04827077825591236, "learning_rate": 0.00012218225672066524, "loss": 0.8395, "step": 126090 }, { "epoch": 2.213873136817711, "grad_norm": 0.047618621170581056, "learning_rate": 0.00012217137101873323, "loss": 0.8444, "step": 126100 }, { "epoch": 2.2140487016977124, "grad_norm": 0.05334565107204412, "learning_rate": 0.00012216048504855596, "loss": 0.8435, "step": 126110 }, { "epoch": 2.214224266577714, "grad_norm": 0.04536185241880609, "learning_rate": 0.00012214959881027122, "loss": 0.8441, "step": 126120 }, { "epoch": 2.2143998314577154, "grad_norm": 0.05777115785077156, "learning_rate": 0.00012213871230401705, "loss": 0.8432, "step": 126130 }, { "epoch": 2.2145753963377164, "grad_norm": 0.06228377848543966, "learning_rate": 0.00012212782552993137, "loss": 0.8367, "step": 126140 }, { "epoch": 2.214750961217718, "grad_norm": 0.05075513739031121, "learning_rate": 0.000122116938488152, "loss": 0.8409, "step": 126150 }, { "epoch": 2.2149265260977193, "grad_norm": 0.05817278099598366, "learning_rate": 0.00012210605117881707, "loss": 0.8432, "step": 126160 }, { "epoch": 2.215102090977721, "grad_norm": 0.05295496694444222, "learning_rate": 0.00012209516360206437, "loss": 0.8451, "step": 126170 }, { "epoch": 2.2152776558577223, "grad_norm": 0.09323557715991254, "learning_rate": 0.00012208427575803192, "loss": 0.8418, "step": 126180 }, { "epoch": 2.2154532207377238, "grad_norm": 0.0537539429914354, "learning_rate": 0.00012207338764685767, "loss": 0.846, "step": 126190 }, { "epoch": 2.2156287856177252, "grad_norm": 0.06573737807092007, "learning_rate": 0.00012206249926867954, "loss": 0.8414, "step": 126200 }, { "epoch": 2.2158043504977263, "grad_norm": 0.07288256548536974, "learning_rate": 0.00012205161062363553, "loss": 0.8431, "step": 126210 }, { "epoch": 2.2159799153777278, "grad_norm": 0.07123957953719558, "learning_rate": 0.00012204072171186353, "loss": 0.8454, "step": 126220 }, { "epoch": 2.2161554802577292, "grad_norm": 0.07004823700841521, "learning_rate": 0.0001220298325335016, "loss": 0.8576, "step": 126230 }, { "epoch": 2.2163310451377307, "grad_norm": 0.05533944641770923, "learning_rate": 0.00012201894308868763, "loss": 0.8523, "step": 126240 }, { "epoch": 2.216506610017732, "grad_norm": 0.05325957913678593, "learning_rate": 0.00012200805337755964, "loss": 0.84, "step": 126250 }, { "epoch": 2.2166821748977332, "grad_norm": 0.062291746086394595, "learning_rate": 0.00012199716340025562, "loss": 0.8446, "step": 126260 }, { "epoch": 2.2168577397777347, "grad_norm": 0.052076041633373545, "learning_rate": 0.00012198627315691345, "loss": 0.8418, "step": 126270 }, { "epoch": 2.217033304657736, "grad_norm": 0.051228659134223635, "learning_rate": 0.00012197538264767124, "loss": 0.8447, "step": 126280 }, { "epoch": 2.2172088695377377, "grad_norm": 0.05090819212002249, "learning_rate": 0.00012196449187266685, "loss": 0.8437, "step": 126290 }, { "epoch": 2.217384434417739, "grad_norm": 0.0454109009190558, "learning_rate": 0.00012195360083203837, "loss": 0.848, "step": 126300 }, { "epoch": 2.2175599992977406, "grad_norm": 0.04859826158450606, "learning_rate": 0.00012194270952592377, "loss": 0.8468, "step": 126310 }, { "epoch": 2.217735564177742, "grad_norm": 0.04827383549640412, "learning_rate": 0.00012193181795446095, "loss": 0.8466, "step": 126320 }, { "epoch": 2.217911129057743, "grad_norm": 0.04461621830822064, "learning_rate": 0.00012192092611778807, "loss": 0.845, "step": 126330 }, { "epoch": 2.2180866939377446, "grad_norm": 0.05859748355994447, "learning_rate": 0.000121910034016043, "loss": 0.8467, "step": 126340 }, { "epoch": 2.218262258817746, "grad_norm": 0.059394069683367835, "learning_rate": 0.00012189914164936382, "loss": 0.8457, "step": 126350 }, { "epoch": 2.2184378236977476, "grad_norm": 0.07942198955600516, "learning_rate": 0.00012188824901788851, "loss": 0.8405, "step": 126360 }, { "epoch": 2.218613388577749, "grad_norm": 0.06395199360690917, "learning_rate": 0.00012187735612175504, "loss": 0.8478, "step": 126370 }, { "epoch": 2.2187889534577505, "grad_norm": 0.046814827531287906, "learning_rate": 0.00012186646296110149, "loss": 0.842, "step": 126380 }, { "epoch": 2.2189645183377515, "grad_norm": 0.0684136061567483, "learning_rate": 0.00012185556953606586, "loss": 0.8469, "step": 126390 }, { "epoch": 2.219140083217753, "grad_norm": 0.06758837407902178, "learning_rate": 0.00012184467584678616, "loss": 0.8473, "step": 126400 }, { "epoch": 2.2193156480977545, "grad_norm": 0.0828426595774835, "learning_rate": 0.00012183378189340042, "loss": 0.8412, "step": 126410 }, { "epoch": 2.219491212977756, "grad_norm": 0.06526645148277187, "learning_rate": 0.00012182288767604668, "loss": 0.8357, "step": 126420 }, { "epoch": 2.2196667778577575, "grad_norm": 0.049842220838381045, "learning_rate": 0.00012181199319486296, "loss": 0.8403, "step": 126430 }, { "epoch": 2.219842342737759, "grad_norm": 0.0539725460119618, "learning_rate": 0.00012180109844998728, "loss": 0.8424, "step": 126440 }, { "epoch": 2.22001790761776, "grad_norm": 0.061508797196667, "learning_rate": 0.00012179020344155773, "loss": 0.8403, "step": 126450 }, { "epoch": 2.2201934724977614, "grad_norm": 0.061168748295878456, "learning_rate": 0.0001217793081697123, "loss": 0.8435, "step": 126460 }, { "epoch": 2.220369037377763, "grad_norm": 0.06485893815468528, "learning_rate": 0.00012176841263458907, "loss": 0.8444, "step": 126470 }, { "epoch": 2.2205446022577644, "grad_norm": 0.05034276875293499, "learning_rate": 0.00012175751683632606, "loss": 0.8499, "step": 126480 }, { "epoch": 2.220720167137766, "grad_norm": 0.06548199859907763, "learning_rate": 0.00012174662077506129, "loss": 0.8369, "step": 126490 }, { "epoch": 2.2208957320177674, "grad_norm": 0.04593495378947425, "learning_rate": 0.00012173572445093293, "loss": 0.8456, "step": 126500 }, { "epoch": 2.2210712968977684, "grad_norm": 0.045744289489457905, "learning_rate": 0.0001217248278640789, "loss": 0.8413, "step": 126510 }, { "epoch": 2.22124686177777, "grad_norm": 0.05951027161702524, "learning_rate": 0.00012171393101463738, "loss": 0.8413, "step": 126520 }, { "epoch": 2.2214224266577713, "grad_norm": 0.06424175949058222, "learning_rate": 0.00012170303390274638, "loss": 0.8501, "step": 126530 }, { "epoch": 2.221597991537773, "grad_norm": 0.06002808848016759, "learning_rate": 0.00012169213652854396, "loss": 0.8471, "step": 126540 }, { "epoch": 2.2217735564177743, "grad_norm": 0.0505488578300717, "learning_rate": 0.0001216812388921682, "loss": 0.8424, "step": 126550 }, { "epoch": 2.2219491212977758, "grad_norm": 0.1429328371098002, "learning_rate": 0.00012167034099375718, "loss": 0.8528, "step": 126560 }, { "epoch": 2.222124686177777, "grad_norm": 0.047233463558937414, "learning_rate": 0.00012165944283344898, "loss": 0.8435, "step": 126570 }, { "epoch": 2.2223002510577783, "grad_norm": 0.06954293731363158, "learning_rate": 0.0001216485444113817, "loss": 0.8516, "step": 126580 }, { "epoch": 2.2224758159377798, "grad_norm": 0.054206520764593605, "learning_rate": 0.0001216376457276934, "loss": 0.8473, "step": 126590 }, { "epoch": 2.2226513808177812, "grad_norm": 0.04874093361036773, "learning_rate": 0.00012162674678252215, "loss": 0.8504, "step": 126600 }, { "epoch": 2.2228269456977827, "grad_norm": 0.05207421707499434, "learning_rate": 0.00012161584757600608, "loss": 0.8466, "step": 126610 }, { "epoch": 2.223002510577784, "grad_norm": 0.06338965954588952, "learning_rate": 0.00012160494810828327, "loss": 0.8442, "step": 126620 }, { "epoch": 2.2231780754577852, "grad_norm": 0.05591294857931998, "learning_rate": 0.0001215940483794918, "loss": 0.8462, "step": 126630 }, { "epoch": 2.2233536403377867, "grad_norm": 0.0621792012067224, "learning_rate": 0.00012158314838976981, "loss": 0.8447, "step": 126640 }, { "epoch": 2.223529205217788, "grad_norm": 0.05331296408453664, "learning_rate": 0.00012157224813925538, "loss": 0.8436, "step": 126650 }, { "epoch": 2.2237047700977897, "grad_norm": 0.05685037123672975, "learning_rate": 0.00012156134762808662, "loss": 0.8488, "step": 126660 }, { "epoch": 2.223880334977791, "grad_norm": 0.05537803300502391, "learning_rate": 0.00012155044685640167, "loss": 0.8457, "step": 126670 }, { "epoch": 2.2240558998577926, "grad_norm": 0.05079050571256507, "learning_rate": 0.0001215395458243386, "loss": 0.8416, "step": 126680 }, { "epoch": 2.2242314647377937, "grad_norm": 0.05715163435197782, "learning_rate": 0.00012152864453203554, "loss": 0.8521, "step": 126690 }, { "epoch": 2.224407029617795, "grad_norm": 0.08843125716107106, "learning_rate": 0.00012151774297963063, "loss": 0.8427, "step": 126700 }, { "epoch": 2.2245825944977966, "grad_norm": 0.07611433116226245, "learning_rate": 0.00012150684116726198, "loss": 0.8432, "step": 126710 }, { "epoch": 2.224758159377798, "grad_norm": 0.06354914418885407, "learning_rate": 0.00012149593909506772, "loss": 0.8398, "step": 126720 }, { "epoch": 2.2249337242577996, "grad_norm": 0.06861252538379717, "learning_rate": 0.000121485036763186, "loss": 0.8394, "step": 126730 }, { "epoch": 2.225109289137801, "grad_norm": 0.05338294286142157, "learning_rate": 0.0001214741341717549, "loss": 0.8455, "step": 126740 }, { "epoch": 2.225284854017802, "grad_norm": 0.05350894275836725, "learning_rate": 0.00012146323132091262, "loss": 0.8454, "step": 126750 }, { "epoch": 2.2254604188978035, "grad_norm": 0.0695870259562744, "learning_rate": 0.00012145232821079729, "loss": 0.8444, "step": 126760 }, { "epoch": 2.225635983777805, "grad_norm": 0.05358854761925729, "learning_rate": 0.00012144142484154704, "loss": 0.8414, "step": 126770 }, { "epoch": 2.2258115486578065, "grad_norm": 0.05469690436933419, "learning_rate": 0.00012143052121329999, "loss": 0.8463, "step": 126780 }, { "epoch": 2.225987113537808, "grad_norm": 0.058186260558330274, "learning_rate": 0.00012141961732619435, "loss": 0.8412, "step": 126790 }, { "epoch": 2.2261626784178095, "grad_norm": 0.053162088032489015, "learning_rate": 0.00012140871318036824, "loss": 0.8462, "step": 126800 }, { "epoch": 2.2263382432978105, "grad_norm": 0.06756330812642146, "learning_rate": 0.00012139780877595984, "loss": 0.8405, "step": 126810 }, { "epoch": 2.226513808177812, "grad_norm": 0.049634778961152815, "learning_rate": 0.00012138690411310725, "loss": 0.8487, "step": 126820 }, { "epoch": 2.2266893730578134, "grad_norm": 0.05190590211886879, "learning_rate": 0.00012137599919194869, "loss": 0.8507, "step": 126830 }, { "epoch": 2.226864937937815, "grad_norm": 0.06303888580385243, "learning_rate": 0.00012136509401262235, "loss": 0.8484, "step": 126840 }, { "epoch": 2.2270405028178164, "grad_norm": 0.08153305411877568, "learning_rate": 0.00012135418857526633, "loss": 0.8458, "step": 126850 }, { "epoch": 2.227216067697818, "grad_norm": 0.08049074541294166, "learning_rate": 0.00012134328288001885, "loss": 0.8372, "step": 126860 }, { "epoch": 2.2273916325778194, "grad_norm": 0.052023189048536346, "learning_rate": 0.0001213323769270181, "loss": 0.8413, "step": 126870 }, { "epoch": 2.2275671974578204, "grad_norm": 0.05475484043582049, "learning_rate": 0.0001213214707164022, "loss": 0.8384, "step": 126880 }, { "epoch": 2.227742762337822, "grad_norm": 0.0818886718792576, "learning_rate": 0.00012131056424830937, "loss": 0.8588, "step": 126890 }, { "epoch": 2.2279183272178233, "grad_norm": 0.06755558330779185, "learning_rate": 0.00012129965752287784, "loss": 0.8418, "step": 126900 }, { "epoch": 2.228093892097825, "grad_norm": 0.05026721468648753, "learning_rate": 0.00012128875054024574, "loss": 0.8518, "step": 126910 }, { "epoch": 2.2282694569778263, "grad_norm": 0.06254677141938834, "learning_rate": 0.00012127784330055125, "loss": 0.8382, "step": 126920 }, { "epoch": 2.2284450218578273, "grad_norm": 0.053669949354754685, "learning_rate": 0.00012126693580393262, "loss": 0.8476, "step": 126930 }, { "epoch": 2.228620586737829, "grad_norm": 0.05960322953139219, "learning_rate": 0.00012125602805052807, "loss": 0.8434, "step": 126940 }, { "epoch": 2.2287961516178303, "grad_norm": 0.05254167017081585, "learning_rate": 0.0001212451200404757, "loss": 0.8485, "step": 126950 }, { "epoch": 2.2289717164978318, "grad_norm": 0.05068568847728551, "learning_rate": 0.0001212342117739138, "loss": 0.8526, "step": 126960 }, { "epoch": 2.2291472813778332, "grad_norm": 0.06901731372316801, "learning_rate": 0.00012122330325098058, "loss": 0.8392, "step": 126970 }, { "epoch": 2.2293228462578347, "grad_norm": 0.04651029935805866, "learning_rate": 0.00012121239447181423, "loss": 0.8497, "step": 126980 }, { "epoch": 2.229498411137836, "grad_norm": 0.05203396572661705, "learning_rate": 0.00012120148543655297, "loss": 0.8451, "step": 126990 }, { "epoch": 2.2296739760178372, "grad_norm": 0.05957030913687146, "learning_rate": 0.00012119057614533498, "loss": 0.8486, "step": 127000 }, { "epoch": 2.2298495408978387, "grad_norm": 0.06426275082970383, "learning_rate": 0.00012117966659829856, "loss": 0.847, "step": 127010 }, { "epoch": 2.23002510577784, "grad_norm": 0.09740694752366445, "learning_rate": 0.00012116875679558187, "loss": 0.852, "step": 127020 }, { "epoch": 2.2302006706578417, "grad_norm": 0.06183828889707933, "learning_rate": 0.0001211578467373232, "loss": 0.8488, "step": 127030 }, { "epoch": 2.230376235537843, "grad_norm": 0.05043877802563578, "learning_rate": 0.00012114693642366072, "loss": 0.8366, "step": 127040 }, { "epoch": 2.2305518004178446, "grad_norm": 0.051978325806673685, "learning_rate": 0.00012113602585473268, "loss": 0.849, "step": 127050 }, { "epoch": 2.2307273652978457, "grad_norm": 0.04063493361903021, "learning_rate": 0.00012112511503067738, "loss": 0.8492, "step": 127060 }, { "epoch": 2.230902930177847, "grad_norm": 0.050096779605621866, "learning_rate": 0.00012111420395163297, "loss": 0.8567, "step": 127070 }, { "epoch": 2.2310784950578486, "grad_norm": 0.05017020258850516, "learning_rate": 0.00012110329261773776, "loss": 0.8528, "step": 127080 }, { "epoch": 2.23125405993785, "grad_norm": 0.050876975008473185, "learning_rate": 0.00012109238102912996, "loss": 0.8427, "step": 127090 }, { "epoch": 2.2314296248178516, "grad_norm": 0.057987899619577815, "learning_rate": 0.0001210814691859479, "loss": 0.8388, "step": 127100 }, { "epoch": 2.231605189697853, "grad_norm": 0.05043450875782372, "learning_rate": 0.00012107055708832975, "loss": 0.8417, "step": 127110 }, { "epoch": 2.231780754577854, "grad_norm": 0.052397492491604074, "learning_rate": 0.00012105964473641376, "loss": 0.845, "step": 127120 }, { "epoch": 2.2319563194578556, "grad_norm": 0.0642220400619057, "learning_rate": 0.00012104873213033824, "loss": 0.8527, "step": 127130 }, { "epoch": 2.232131884337857, "grad_norm": 0.053397180817605576, "learning_rate": 0.00012103781927024145, "loss": 0.8466, "step": 127140 }, { "epoch": 2.2323074492178585, "grad_norm": 0.09446753028256093, "learning_rate": 0.00012102690615626167, "loss": 0.8363, "step": 127150 }, { "epoch": 2.23248301409786, "grad_norm": 0.14015779149747495, "learning_rate": 0.00012101599278853712, "loss": 0.8509, "step": 127160 }, { "epoch": 2.2326585789778615, "grad_norm": 0.05699227329853976, "learning_rate": 0.00012100507916720614, "loss": 0.8429, "step": 127170 }, { "epoch": 2.2328341438578625, "grad_norm": 0.05057897529303181, "learning_rate": 0.00012099416529240695, "loss": 0.8457, "step": 127180 }, { "epoch": 2.233009708737864, "grad_norm": 0.051850279455429846, "learning_rate": 0.00012098325116427784, "loss": 0.8504, "step": 127190 }, { "epoch": 2.2331852736178655, "grad_norm": 0.048130844961995774, "learning_rate": 0.00012097233678295712, "loss": 0.8382, "step": 127200 }, { "epoch": 2.233360838497867, "grad_norm": 0.05786962030938025, "learning_rate": 0.00012096142214858308, "loss": 0.8461, "step": 127210 }, { "epoch": 2.2335364033778684, "grad_norm": 0.05219772283483234, "learning_rate": 0.000120950507261294, "loss": 0.8517, "step": 127220 }, { "epoch": 2.23371196825787, "grad_norm": 0.05125757420415853, "learning_rate": 0.00012093959212122814, "loss": 0.8454, "step": 127230 }, { "epoch": 2.233887533137871, "grad_norm": 0.06337737455899584, "learning_rate": 0.00012092867672852384, "loss": 0.8431, "step": 127240 }, { "epoch": 2.2340630980178724, "grad_norm": 0.06240386819017549, "learning_rate": 0.00012091776108331938, "loss": 0.848, "step": 127250 }, { "epoch": 2.234238662897874, "grad_norm": 0.04820925002540306, "learning_rate": 0.00012090684518575309, "loss": 0.8388, "step": 127260 }, { "epoch": 2.2344142277778754, "grad_norm": 0.07092715891410631, "learning_rate": 0.00012089592903596323, "loss": 0.8429, "step": 127270 }, { "epoch": 2.234589792657877, "grad_norm": 0.04851370776953453, "learning_rate": 0.00012088501263408816, "loss": 0.8411, "step": 127280 }, { "epoch": 2.2347653575378783, "grad_norm": 0.056370651516150026, "learning_rate": 0.00012087409598026616, "loss": 0.8445, "step": 127290 }, { "epoch": 2.2349409224178793, "grad_norm": 0.055406029301865944, "learning_rate": 0.00012086317907463556, "loss": 0.8466, "step": 127300 }, { "epoch": 2.235116487297881, "grad_norm": 0.06741774078954495, "learning_rate": 0.00012085226191733465, "loss": 0.8423, "step": 127310 }, { "epoch": 2.2352920521778823, "grad_norm": 0.049896059664963244, "learning_rate": 0.00012084134450850182, "loss": 0.8478, "step": 127320 }, { "epoch": 2.2354676170578838, "grad_norm": 0.057817205338778016, "learning_rate": 0.00012083042684827532, "loss": 0.84, "step": 127330 }, { "epoch": 2.2356431819378852, "grad_norm": 0.07149054434998792, "learning_rate": 0.0001208195089367935, "loss": 0.8464, "step": 127340 }, { "epoch": 2.2358187468178867, "grad_norm": 0.06225726158944956, "learning_rate": 0.00012080859077419473, "loss": 0.849, "step": 127350 }, { "epoch": 2.2359943116978878, "grad_norm": 0.05003494012216922, "learning_rate": 0.0001207976723606173, "loss": 0.8448, "step": 127360 }, { "epoch": 2.2361698765778892, "grad_norm": 0.05616719051380266, "learning_rate": 0.00012078675369619959, "loss": 0.8468, "step": 127370 }, { "epoch": 2.2363454414578907, "grad_norm": 0.05386309344251496, "learning_rate": 0.00012077583478107992, "loss": 0.8394, "step": 127380 }, { "epoch": 2.236521006337892, "grad_norm": 0.05572780073858405, "learning_rate": 0.0001207649156153966, "loss": 0.8513, "step": 127390 }, { "epoch": 2.2366965712178937, "grad_norm": 0.07718166836074371, "learning_rate": 0.00012075399619928805, "loss": 0.8373, "step": 127400 }, { "epoch": 2.236872136097895, "grad_norm": 0.06672499207061582, "learning_rate": 0.00012074307653289254, "loss": 0.8376, "step": 127410 }, { "epoch": 2.237047700977896, "grad_norm": 0.07108159699886057, "learning_rate": 0.00012073215661634851, "loss": 0.852, "step": 127420 }, { "epoch": 2.2372232658578977, "grad_norm": 0.04008863013393428, "learning_rate": 0.00012072123644979423, "loss": 0.8383, "step": 127430 }, { "epoch": 2.237398830737899, "grad_norm": 0.048878392621651764, "learning_rate": 0.00012071031603336814, "loss": 0.853, "step": 127440 }, { "epoch": 2.2375743956179006, "grad_norm": 0.040562453410576675, "learning_rate": 0.00012069939536720858, "loss": 0.8438, "step": 127450 }, { "epoch": 2.237749960497902, "grad_norm": 0.06841936846319334, "learning_rate": 0.00012068847445145385, "loss": 0.8547, "step": 127460 }, { "epoch": 2.2379255253779036, "grad_norm": 0.062043539179198776, "learning_rate": 0.00012067755328624242, "loss": 0.8483, "step": 127470 }, { "epoch": 2.2381010902579046, "grad_norm": 0.0515222887281822, "learning_rate": 0.00012066663187171261, "loss": 0.8438, "step": 127480 }, { "epoch": 2.238276655137906, "grad_norm": 0.048078104386411954, "learning_rate": 0.00012065571020800279, "loss": 0.8398, "step": 127490 }, { "epoch": 2.2384522200179076, "grad_norm": 0.06331724834574518, "learning_rate": 0.00012064478829525139, "loss": 0.8406, "step": 127500 }, { "epoch": 2.238627784897909, "grad_norm": 0.05408857775549483, "learning_rate": 0.00012063386613359671, "loss": 0.8513, "step": 127510 }, { "epoch": 2.2388033497779105, "grad_norm": 0.05420122880186484, "learning_rate": 0.00012062294372317721, "loss": 0.8446, "step": 127520 }, { "epoch": 2.238978914657912, "grad_norm": 0.04161104660398838, "learning_rate": 0.00012061202106413125, "loss": 0.8432, "step": 127530 }, { "epoch": 2.2391544795379135, "grad_norm": 0.04592771082959264, "learning_rate": 0.00012060109815659721, "loss": 0.8487, "step": 127540 }, { "epoch": 2.2393300444179145, "grad_norm": 0.04161974344010487, "learning_rate": 0.00012059017500071354, "loss": 0.8456, "step": 127550 }, { "epoch": 2.239505609297916, "grad_norm": 0.0549208471819303, "learning_rate": 0.00012057925159661856, "loss": 0.847, "step": 127560 }, { "epoch": 2.2396811741779175, "grad_norm": 0.061230256495136845, "learning_rate": 0.00012056832794445072, "loss": 0.8396, "step": 127570 }, { "epoch": 2.239856739057919, "grad_norm": 0.050607266488797556, "learning_rate": 0.00012055740404434841, "loss": 0.8414, "step": 127580 }, { "epoch": 2.2400323039379204, "grad_norm": 0.07557476795461483, "learning_rate": 0.00012054647989645007, "loss": 0.8511, "step": 127590 }, { "epoch": 2.2402078688179214, "grad_norm": 0.04487835281780756, "learning_rate": 0.00012053555550089403, "loss": 0.8359, "step": 127600 }, { "epoch": 2.240383433697923, "grad_norm": 0.06741257746376572, "learning_rate": 0.0001205246308578188, "loss": 0.8493, "step": 127610 }, { "epoch": 2.2405589985779244, "grad_norm": 0.050863118017829244, "learning_rate": 0.00012051370596736275, "loss": 0.8433, "step": 127620 }, { "epoch": 2.240734563457926, "grad_norm": 0.06005233088979729, "learning_rate": 0.00012050278082966429, "loss": 0.8418, "step": 127630 }, { "epoch": 2.2409101283379274, "grad_norm": 0.06577786908152802, "learning_rate": 0.00012049185544486187, "loss": 0.8494, "step": 127640 }, { "epoch": 2.241085693217929, "grad_norm": 0.058503192094923794, "learning_rate": 0.00012048092981309391, "loss": 0.8441, "step": 127650 }, { "epoch": 2.2412612580979303, "grad_norm": 0.0590534186115401, "learning_rate": 0.00012047000393449883, "loss": 0.8491, "step": 127660 }, { "epoch": 2.2414368229779313, "grad_norm": 0.07115154207453026, "learning_rate": 0.0001204590778092151, "loss": 0.8438, "step": 127670 }, { "epoch": 2.241612387857933, "grad_norm": 0.08093564099255758, "learning_rate": 0.00012044815143738106, "loss": 0.8474, "step": 127680 }, { "epoch": 2.2417879527379343, "grad_norm": 0.051001426231876555, "learning_rate": 0.00012043722481913526, "loss": 0.8449, "step": 127690 }, { "epoch": 2.2419635176179358, "grad_norm": 0.04811234529075256, "learning_rate": 0.00012042629795461609, "loss": 0.8419, "step": 127700 }, { "epoch": 2.2421390824979373, "grad_norm": 0.05945939353237796, "learning_rate": 0.00012041537084396199, "loss": 0.8469, "step": 127710 }, { "epoch": 2.2423146473779383, "grad_norm": 0.050002411317756686, "learning_rate": 0.00012040444348731146, "loss": 0.8451, "step": 127720 }, { "epoch": 2.2424902122579398, "grad_norm": 0.08294293376518933, "learning_rate": 0.00012039351588480285, "loss": 0.8521, "step": 127730 }, { "epoch": 2.2426657771379412, "grad_norm": 0.05417032205793188, "learning_rate": 0.00012038258803657472, "loss": 0.8523, "step": 127740 }, { "epoch": 2.2428413420179427, "grad_norm": 0.062023417435839764, "learning_rate": 0.00012037165994276545, "loss": 0.8514, "step": 127750 }, { "epoch": 2.243016906897944, "grad_norm": 0.05556749597666896, "learning_rate": 0.00012036073160351358, "loss": 0.8491, "step": 127760 }, { "epoch": 2.2431924717779457, "grad_norm": 0.06576451850135229, "learning_rate": 0.00012034980301895749, "loss": 0.8407, "step": 127770 }, { "epoch": 2.243368036657947, "grad_norm": 0.06110549067518934, "learning_rate": 0.0001203388741892357, "loss": 0.8457, "step": 127780 }, { "epoch": 2.243543601537948, "grad_norm": 0.05854086673256772, "learning_rate": 0.00012032794511448669, "loss": 0.8463, "step": 127790 }, { "epoch": 2.2437191664179497, "grad_norm": 0.0452600777635768, "learning_rate": 0.0001203170157948489, "loss": 0.8487, "step": 127800 }, { "epoch": 2.243894731297951, "grad_norm": 0.06230528180066106, "learning_rate": 0.0001203060862304608, "loss": 0.8497, "step": 127810 }, { "epoch": 2.2440702961779526, "grad_norm": 0.050887777695409635, "learning_rate": 0.0001202951564214609, "loss": 0.8458, "step": 127820 }, { "epoch": 2.244245861057954, "grad_norm": 0.05131143702692972, "learning_rate": 0.00012028422636798766, "loss": 0.8441, "step": 127830 }, { "epoch": 2.2444214259379556, "grad_norm": 0.04679862232566087, "learning_rate": 0.00012027329607017961, "loss": 0.8511, "step": 127840 }, { "epoch": 2.2445969908179566, "grad_norm": 0.07384193750348465, "learning_rate": 0.00012026236552817516, "loss": 0.846, "step": 127850 }, { "epoch": 2.244772555697958, "grad_norm": 0.04834016687838362, "learning_rate": 0.00012025143474211289, "loss": 0.8433, "step": 127860 }, { "epoch": 2.2449481205779596, "grad_norm": 0.04618925652587663, "learning_rate": 0.0001202405037121312, "loss": 0.8449, "step": 127870 }, { "epoch": 2.245123685457961, "grad_norm": 0.05392338022026461, "learning_rate": 0.0001202295724383687, "loss": 0.8494, "step": 127880 }, { "epoch": 2.2452992503379625, "grad_norm": 0.05367631157373896, "learning_rate": 0.0001202186409209638, "loss": 0.8412, "step": 127890 }, { "epoch": 2.245474815217964, "grad_norm": 0.054883681539805704, "learning_rate": 0.00012020770916005504, "loss": 0.8356, "step": 127900 }, { "epoch": 2.245650380097965, "grad_norm": 0.06484077660874309, "learning_rate": 0.0001201967771557809, "loss": 0.8381, "step": 127910 }, { "epoch": 2.2458259449779665, "grad_norm": 0.07652584890937958, "learning_rate": 0.00012018584490827995, "loss": 0.844, "step": 127920 }, { "epoch": 2.246001509857968, "grad_norm": 0.050771552571713674, "learning_rate": 0.00012017491241769066, "loss": 0.8476, "step": 127930 }, { "epoch": 2.2461770747379695, "grad_norm": 0.06577278789734202, "learning_rate": 0.00012016397968415153, "loss": 0.8425, "step": 127940 }, { "epoch": 2.246352639617971, "grad_norm": 0.05219347956392632, "learning_rate": 0.00012015304670780115, "loss": 0.848, "step": 127950 }, { "epoch": 2.2465282044979724, "grad_norm": 0.047634636200193474, "learning_rate": 0.00012014211348877798, "loss": 0.8428, "step": 127960 }, { "epoch": 2.2467037693779734, "grad_norm": 0.04196046724706411, "learning_rate": 0.00012013118002722054, "loss": 0.8498, "step": 127970 }, { "epoch": 2.246879334257975, "grad_norm": 0.05014883572543192, "learning_rate": 0.00012012024632326742, "loss": 0.8536, "step": 127980 }, { "epoch": 2.2470548991379764, "grad_norm": 0.057760193843324086, "learning_rate": 0.00012010931237705707, "loss": 0.8393, "step": 127990 }, { "epoch": 2.247230464017978, "grad_norm": 0.06211715323321185, "learning_rate": 0.00012009837818872808, "loss": 0.8462, "step": 128000 }, { "epoch": 2.2474060288979794, "grad_norm": 0.048407856334641335, "learning_rate": 0.00012008744375841903, "loss": 0.8526, "step": 128010 }, { "epoch": 2.247581593777981, "grad_norm": 0.05652547433648526, "learning_rate": 0.00012007650908626835, "loss": 0.8483, "step": 128020 }, { "epoch": 2.247757158657982, "grad_norm": 0.04483441715001763, "learning_rate": 0.00012006557417241469, "loss": 0.844, "step": 128030 }, { "epoch": 2.2479327235379833, "grad_norm": 0.05064505364581861, "learning_rate": 0.00012005463901699649, "loss": 0.8403, "step": 128040 }, { "epoch": 2.248108288417985, "grad_norm": 0.05509192719538145, "learning_rate": 0.00012004370362015242, "loss": 0.8407, "step": 128050 }, { "epoch": 2.2482838532979863, "grad_norm": 0.055436586314752505, "learning_rate": 0.00012003276798202095, "loss": 0.842, "step": 128060 }, { "epoch": 2.248459418177988, "grad_norm": 0.06358946168356959, "learning_rate": 0.00012002183210274066, "loss": 0.8554, "step": 128070 }, { "epoch": 2.2486349830579893, "grad_norm": 0.07174397724627936, "learning_rate": 0.00012001089598245012, "loss": 0.8387, "step": 128080 }, { "epoch": 2.2488105479379903, "grad_norm": 0.09524476515321627, "learning_rate": 0.0001199999596212879, "loss": 0.858, "step": 128090 }, { "epoch": 2.2489861128179918, "grad_norm": 0.04632378523523165, "learning_rate": 0.00011998902301939252, "loss": 0.8414, "step": 128100 }, { "epoch": 2.2491616776979932, "grad_norm": 0.04873433943446527, "learning_rate": 0.00011997808617690258, "loss": 0.8416, "step": 128110 }, { "epoch": 2.2493372425779947, "grad_norm": 0.054624505429070616, "learning_rate": 0.00011996714909395665, "loss": 0.8423, "step": 128120 }, { "epoch": 2.249512807457996, "grad_norm": 0.0566616030348163, "learning_rate": 0.00011995621177069334, "loss": 0.8391, "step": 128130 }, { "epoch": 2.2496883723379977, "grad_norm": 0.04297834493865435, "learning_rate": 0.00011994527420725116, "loss": 0.8421, "step": 128140 }, { "epoch": 2.2498639372179987, "grad_norm": 0.06599351450731214, "learning_rate": 0.00011993433640376877, "loss": 0.8452, "step": 128150 }, { "epoch": 2.250039502098, "grad_norm": 0.0711317104756666, "learning_rate": 0.00011992339836038465, "loss": 0.8458, "step": 128160 }, { "epoch": 2.2502150669780017, "grad_norm": 0.06529629568761311, "learning_rate": 0.00011991246007723747, "loss": 0.8406, "step": 128170 }, { "epoch": 2.250390631858003, "grad_norm": 0.05008751918166789, "learning_rate": 0.00011990152155446581, "loss": 0.8446, "step": 128180 }, { "epoch": 2.2505661967380046, "grad_norm": 0.047516861812579206, "learning_rate": 0.00011989058279220824, "loss": 0.8427, "step": 128190 }, { "epoch": 2.250741761618006, "grad_norm": 0.06785645912376191, "learning_rate": 0.00011987964379060337, "loss": 0.8379, "step": 128200 }, { "epoch": 2.2509173264980076, "grad_norm": 0.051872200204142925, "learning_rate": 0.00011986870454978978, "loss": 0.849, "step": 128210 }, { "epoch": 2.2510928913780086, "grad_norm": 0.06372857168697321, "learning_rate": 0.0001198577650699061, "loss": 0.8487, "step": 128220 }, { "epoch": 2.25126845625801, "grad_norm": 0.06796493771244702, "learning_rate": 0.0001198468253510909, "loss": 0.8497, "step": 128230 }, { "epoch": 2.2514440211380116, "grad_norm": 0.05471695906352941, "learning_rate": 0.00011983588539348282, "loss": 0.8486, "step": 128240 }, { "epoch": 2.251619586018013, "grad_norm": 0.050153442746337144, "learning_rate": 0.0001198249451972205, "loss": 0.8434, "step": 128250 }, { "epoch": 2.2517951508980145, "grad_norm": 0.06623710628659894, "learning_rate": 0.00011981400476244246, "loss": 0.8444, "step": 128260 }, { "epoch": 2.2519707157780156, "grad_norm": 0.061799800584016844, "learning_rate": 0.00011980306408928743, "loss": 0.8466, "step": 128270 }, { "epoch": 2.252146280658017, "grad_norm": 0.06187129068418657, "learning_rate": 0.00011979212317789392, "loss": 0.8448, "step": 128280 }, { "epoch": 2.2523218455380185, "grad_norm": 0.05838236552609495, "learning_rate": 0.00011978118202840062, "loss": 0.8444, "step": 128290 }, { "epoch": 2.25249741041802, "grad_norm": 0.10139481726466139, "learning_rate": 0.0001197702406409462, "loss": 0.8489, "step": 128300 }, { "epoch": 2.2526729752980215, "grad_norm": 0.05767646991297251, "learning_rate": 0.00011975929901566918, "loss": 0.8381, "step": 128310 }, { "epoch": 2.252848540178023, "grad_norm": 0.04663109369127097, "learning_rate": 0.00011974835715270825, "loss": 0.8496, "step": 128320 }, { "epoch": 2.2530241050580244, "grad_norm": 0.06738488341629936, "learning_rate": 0.00011973741505220203, "loss": 0.8453, "step": 128330 }, { "epoch": 2.2531996699380255, "grad_norm": 0.05587725114651585, "learning_rate": 0.0001197264727142892, "loss": 0.8469, "step": 128340 }, { "epoch": 2.253375234818027, "grad_norm": 0.061583466364615536, "learning_rate": 0.0001197155301391084, "loss": 0.8555, "step": 128350 }, { "epoch": 2.2535507996980284, "grad_norm": 0.05737171752425201, "learning_rate": 0.00011970458732679819, "loss": 0.8515, "step": 128360 }, { "epoch": 2.25372636457803, "grad_norm": 0.07621597394769182, "learning_rate": 0.00011969364427749727, "loss": 0.8389, "step": 128370 }, { "epoch": 2.2539019294580314, "grad_norm": 0.05819792472221031, "learning_rate": 0.00011968270099134433, "loss": 0.8361, "step": 128380 }, { "epoch": 2.2540774943380324, "grad_norm": 0.07056257788937743, "learning_rate": 0.00011967175746847797, "loss": 0.8478, "step": 128390 }, { "epoch": 2.254253059218034, "grad_norm": 0.04515653624560417, "learning_rate": 0.00011966081370903688, "loss": 0.8517, "step": 128400 }, { "epoch": 2.2544286240980353, "grad_norm": 0.0649929536184501, "learning_rate": 0.00011964986971315968, "loss": 0.8448, "step": 128410 }, { "epoch": 2.254604188978037, "grad_norm": 0.04489463433818073, "learning_rate": 0.0001196389254809851, "loss": 0.8472, "step": 128420 }, { "epoch": 2.2547797538580383, "grad_norm": 0.059994200173014475, "learning_rate": 0.00011962798101265171, "loss": 0.8543, "step": 128430 }, { "epoch": 2.25495531873804, "grad_norm": 0.0475703998049247, "learning_rate": 0.00011961703630829826, "loss": 0.8427, "step": 128440 }, { "epoch": 2.2551308836180413, "grad_norm": 0.05790113153554155, "learning_rate": 0.00011960609136806338, "loss": 0.8448, "step": 128450 }, { "epoch": 2.2553064484980423, "grad_norm": 0.05757026380798913, "learning_rate": 0.00011959514619208576, "loss": 0.854, "step": 128460 }, { "epoch": 2.2554820133780438, "grad_norm": 0.06405550238337962, "learning_rate": 0.00011958420078050408, "loss": 0.8467, "step": 128470 }, { "epoch": 2.2556575782580452, "grad_norm": 0.051013608384510106, "learning_rate": 0.000119573255133457, "loss": 0.8366, "step": 128480 }, { "epoch": 2.2558331431380467, "grad_norm": 0.07210407954662754, "learning_rate": 0.00011956230925108323, "loss": 0.8509, "step": 128490 }, { "epoch": 2.256008708018048, "grad_norm": 0.09117962769974118, "learning_rate": 0.00011955136313352142, "loss": 0.8378, "step": 128500 }, { "epoch": 2.2561842728980492, "grad_norm": 0.05171681448831035, "learning_rate": 0.00011954041678091032, "loss": 0.8535, "step": 128510 }, { "epoch": 2.2563598377780507, "grad_norm": 0.12385349977343448, "learning_rate": 0.0001195294701933886, "loss": 0.8561, "step": 128520 }, { "epoch": 2.256535402658052, "grad_norm": 0.051481911535265996, "learning_rate": 0.0001195185233710949, "loss": 0.8509, "step": 128530 }, { "epoch": 2.2567109675380537, "grad_norm": 0.05677915412963972, "learning_rate": 0.00011950757631416796, "loss": 0.843, "step": 128540 }, { "epoch": 2.256886532418055, "grad_norm": 0.047572117502051, "learning_rate": 0.0001194966290227465, "loss": 0.8483, "step": 128550 }, { "epoch": 2.2570620972980566, "grad_norm": 0.047085237980671324, "learning_rate": 0.0001194856814969692, "loss": 0.8483, "step": 128560 }, { "epoch": 2.257237662178058, "grad_norm": 0.04338139919355072, "learning_rate": 0.00011947473373697477, "loss": 0.837, "step": 128570 }, { "epoch": 2.257413227058059, "grad_norm": 0.041388391937648684, "learning_rate": 0.00011946378574290191, "loss": 0.845, "step": 128580 }, { "epoch": 2.2575887919380606, "grad_norm": 0.04563434591388101, "learning_rate": 0.00011945283751488938, "loss": 0.848, "step": 128590 }, { "epoch": 2.257764356818062, "grad_norm": 0.062396487350686425, "learning_rate": 0.00011944188905307583, "loss": 0.8452, "step": 128600 }, { "epoch": 2.2579399216980636, "grad_norm": 0.0588803562004438, "learning_rate": 0.00011943094035760003, "loss": 0.8402, "step": 128610 }, { "epoch": 2.258115486578065, "grad_norm": 0.066373537622859, "learning_rate": 0.00011941999142860065, "loss": 0.8443, "step": 128620 }, { "epoch": 2.258291051458066, "grad_norm": 0.06280810540122657, "learning_rate": 0.0001194090422662165, "loss": 0.8453, "step": 128630 }, { "epoch": 2.2584666163380676, "grad_norm": 0.06676546185459425, "learning_rate": 0.00011939809287058626, "loss": 0.8439, "step": 128640 }, { "epoch": 2.258642181218069, "grad_norm": 0.060672489982477006, "learning_rate": 0.00011938714324184859, "loss": 0.8401, "step": 128650 }, { "epoch": 2.2588177460980705, "grad_norm": 0.05056339385951393, "learning_rate": 0.00011937619338014236, "loss": 0.8421, "step": 128660 }, { "epoch": 2.258993310978072, "grad_norm": 0.055790992082679666, "learning_rate": 0.00011936524328560617, "loss": 0.8415, "step": 128670 }, { "epoch": 2.2591688758580735, "grad_norm": 0.09056493606918961, "learning_rate": 0.00011935429295837888, "loss": 0.845, "step": 128680 }, { "epoch": 2.259344440738075, "grad_norm": 0.069733168422254, "learning_rate": 0.00011934334239859916, "loss": 0.846, "step": 128690 }, { "epoch": 2.259520005618076, "grad_norm": 0.040228691703988566, "learning_rate": 0.00011933239160640582, "loss": 0.8452, "step": 128700 }, { "epoch": 2.2596955704980775, "grad_norm": 0.06961700494381752, "learning_rate": 0.00011932144058193753, "loss": 0.8445, "step": 128710 }, { "epoch": 2.259871135378079, "grad_norm": 0.05596952710503413, "learning_rate": 0.00011931048932533304, "loss": 0.8565, "step": 128720 }, { "epoch": 2.2600467002580804, "grad_norm": 0.04689436572418216, "learning_rate": 0.0001192995378367312, "loss": 0.8483, "step": 128730 }, { "epoch": 2.260222265138082, "grad_norm": 0.054401472174705555, "learning_rate": 0.0001192885861162707, "loss": 0.8414, "step": 128740 }, { "epoch": 2.2603978300180834, "grad_norm": 0.07803545844379477, "learning_rate": 0.00011927763416409028, "loss": 0.8444, "step": 128750 }, { "epoch": 2.2605733948980844, "grad_norm": 0.06192113681842398, "learning_rate": 0.00011926668198032876, "loss": 0.8443, "step": 128760 }, { "epoch": 2.260748959778086, "grad_norm": 0.057075963234159255, "learning_rate": 0.00011925572956512485, "loss": 0.8424, "step": 128770 }, { "epoch": 2.2609245246580874, "grad_norm": 0.06687259583707748, "learning_rate": 0.00011924477691861736, "loss": 0.8438, "step": 128780 }, { "epoch": 2.261100089538089, "grad_norm": 0.08156946047359688, "learning_rate": 0.00011923382404094505, "loss": 0.8515, "step": 128790 }, { "epoch": 2.2612756544180903, "grad_norm": 0.06636762341431478, "learning_rate": 0.00011922287093224668, "loss": 0.858, "step": 128800 }, { "epoch": 2.261451219298092, "grad_norm": 0.0675332470849134, "learning_rate": 0.00011921191759266108, "loss": 0.8423, "step": 128810 }, { "epoch": 2.261626784178093, "grad_norm": 0.06689572991419672, "learning_rate": 0.00011920096402232697, "loss": 0.8405, "step": 128820 }, { "epoch": 2.2618023490580943, "grad_norm": 0.05223008113191984, "learning_rate": 0.00011919001022138319, "loss": 0.8517, "step": 128830 }, { "epoch": 2.2619779139380958, "grad_norm": 0.05488393026727925, "learning_rate": 0.00011917905618996843, "loss": 0.84, "step": 128840 }, { "epoch": 2.2621534788180973, "grad_norm": 0.051427643869491994, "learning_rate": 0.00011916810192822157, "loss": 0.8522, "step": 128850 }, { "epoch": 2.2623290436980987, "grad_norm": 0.051293824899979586, "learning_rate": 0.00011915714743628142, "loss": 0.854, "step": 128860 }, { "epoch": 2.2625046085781, "grad_norm": 0.052748337123286644, "learning_rate": 0.00011914619271428667, "loss": 0.8505, "step": 128870 }, { "epoch": 2.2626801734581017, "grad_norm": 0.056719268216248814, "learning_rate": 0.00011913523776237622, "loss": 0.8468, "step": 128880 }, { "epoch": 2.2628557383381027, "grad_norm": 0.04965753379087251, "learning_rate": 0.0001191242825806888, "loss": 0.855, "step": 128890 }, { "epoch": 2.263031303218104, "grad_norm": 0.053292097875048745, "learning_rate": 0.00011911332716936329, "loss": 0.8392, "step": 128900 }, { "epoch": 2.2632068680981057, "grad_norm": 0.050790342624577836, "learning_rate": 0.00011910237152853843, "loss": 0.8465, "step": 128910 }, { "epoch": 2.263382432978107, "grad_norm": 0.0521877062923961, "learning_rate": 0.00011909141565835305, "loss": 0.845, "step": 128920 }, { "epoch": 2.2635579978581086, "grad_norm": 0.06382062226312961, "learning_rate": 0.00011908045955894598, "loss": 0.8488, "step": 128930 }, { "epoch": 2.2637335627381097, "grad_norm": 0.05340611316476193, "learning_rate": 0.000119069503230456, "loss": 0.8398, "step": 128940 }, { "epoch": 2.263909127618111, "grad_norm": 0.06559357088304377, "learning_rate": 0.00011905854667302197, "loss": 0.8448, "step": 128950 }, { "epoch": 2.2640846924981126, "grad_norm": 0.06479807681978746, "learning_rate": 0.00011904758988678269, "loss": 0.8499, "step": 128960 }, { "epoch": 2.264260257378114, "grad_norm": 0.04393978046740033, "learning_rate": 0.00011903663287187699, "loss": 0.8402, "step": 128970 }, { "epoch": 2.2644358222581156, "grad_norm": 0.06086719383965293, "learning_rate": 0.00011902567562844369, "loss": 0.8396, "step": 128980 }, { "epoch": 2.264611387138117, "grad_norm": 0.04981113300432262, "learning_rate": 0.00011901471815662162, "loss": 0.8393, "step": 128990 }, { "epoch": 2.2647869520181185, "grad_norm": 0.05122712513331021, "learning_rate": 0.00011900376045654965, "loss": 0.845, "step": 129000 }, { "epoch": 2.2649625168981196, "grad_norm": 0.04311122617790364, "learning_rate": 0.0001189928025283665, "loss": 0.8413, "step": 129010 }, { "epoch": 2.265138081778121, "grad_norm": 0.04999881742061369, "learning_rate": 0.00011898184437221117, "loss": 0.8438, "step": 129020 }, { "epoch": 2.2653136466581225, "grad_norm": 0.06288071637446098, "learning_rate": 0.00011897088598822241, "loss": 0.8464, "step": 129030 }, { "epoch": 2.265489211538124, "grad_norm": 0.04995700196822464, "learning_rate": 0.00011895992737653907, "loss": 0.8433, "step": 129040 }, { "epoch": 2.2656647764181255, "grad_norm": 0.045328697418117556, "learning_rate": 0.00011894896853730002, "loss": 0.8547, "step": 129050 }, { "epoch": 2.2658403412981265, "grad_norm": 0.052531732684747534, "learning_rate": 0.00011893800947064411, "loss": 0.8498, "step": 129060 }, { "epoch": 2.266015906178128, "grad_norm": 0.06645497773617823, "learning_rate": 0.00011892705017671018, "loss": 0.8411, "step": 129070 }, { "epoch": 2.2661914710581295, "grad_norm": 0.05732260616410579, "learning_rate": 0.00011891609065563707, "loss": 0.845, "step": 129080 }, { "epoch": 2.266367035938131, "grad_norm": 0.046365735790160595, "learning_rate": 0.00011890513090756364, "loss": 0.8475, "step": 129090 }, { "epoch": 2.2665426008181324, "grad_norm": 0.04604328642341464, "learning_rate": 0.0001188941709326288, "loss": 0.8456, "step": 129100 }, { "epoch": 2.266718165698134, "grad_norm": 0.06850647325222535, "learning_rate": 0.00011888321073097135, "loss": 0.8413, "step": 129110 }, { "epoch": 2.2668937305781354, "grad_norm": 0.07088630697786258, "learning_rate": 0.00011887225030273024, "loss": 0.8433, "step": 129120 }, { "epoch": 2.2670692954581364, "grad_norm": 0.059783499091725985, "learning_rate": 0.00011886128964804426, "loss": 0.8495, "step": 129130 }, { "epoch": 2.267244860338138, "grad_norm": 0.0524798732890788, "learning_rate": 0.00011885032876705233, "loss": 0.8423, "step": 129140 }, { "epoch": 2.2674204252181394, "grad_norm": 0.05453577132980929, "learning_rate": 0.00011883936765989334, "loss": 0.8495, "step": 129150 }, { "epoch": 2.267595990098141, "grad_norm": 0.06481405471189107, "learning_rate": 0.00011882840632670611, "loss": 0.847, "step": 129160 }, { "epoch": 2.2677715549781423, "grad_norm": 0.0618537446003354, "learning_rate": 0.00011881744476762957, "loss": 0.8457, "step": 129170 }, { "epoch": 2.2679471198581433, "grad_norm": 0.050764107989486544, "learning_rate": 0.00011880648298280254, "loss": 0.8446, "step": 129180 }, { "epoch": 2.268122684738145, "grad_norm": 0.044845734562794115, "learning_rate": 0.00011879552097236403, "loss": 0.8482, "step": 129190 }, { "epoch": 2.2682982496181463, "grad_norm": 0.06359242211063387, "learning_rate": 0.00011878455873645285, "loss": 0.8447, "step": 129200 }, { "epoch": 2.268473814498148, "grad_norm": 0.058670792500878755, "learning_rate": 0.00011877359627520785, "loss": 0.8522, "step": 129210 }, { "epoch": 2.2686493793781493, "grad_norm": 0.054645783743175115, "learning_rate": 0.00011876263358876802, "loss": 0.8476, "step": 129220 }, { "epoch": 2.2688249442581507, "grad_norm": 0.051404881765793574, "learning_rate": 0.0001187516706772722, "loss": 0.8508, "step": 129230 }, { "epoch": 2.269000509138152, "grad_norm": 0.05039239493434629, "learning_rate": 0.00011874070754085933, "loss": 0.8301, "step": 129240 }, { "epoch": 2.2691760740181532, "grad_norm": 0.06626321855193014, "learning_rate": 0.0001187297441796683, "loss": 0.8419, "step": 129250 }, { "epoch": 2.2693516388981547, "grad_norm": 0.05344582657729159, "learning_rate": 0.00011871878059383799, "loss": 0.8385, "step": 129260 }, { "epoch": 2.269527203778156, "grad_norm": 0.05335136078793268, "learning_rate": 0.00011870781678350735, "loss": 0.8414, "step": 129270 }, { "epoch": 2.2697027686581577, "grad_norm": 0.06172758884923604, "learning_rate": 0.00011869685274881524, "loss": 0.8406, "step": 129280 }, { "epoch": 2.269878333538159, "grad_norm": 0.06618398432430601, "learning_rate": 0.00011868588848990065, "loss": 0.8472, "step": 129290 }, { "epoch": 2.27005389841816, "grad_norm": 0.06207034681398319, "learning_rate": 0.00011867492400690245, "loss": 0.8383, "step": 129300 }, { "epoch": 2.2702294632981617, "grad_norm": 0.06419387391632278, "learning_rate": 0.00011866395929995959, "loss": 0.8403, "step": 129310 }, { "epoch": 2.270405028178163, "grad_norm": 0.060039639100398626, "learning_rate": 0.00011865299436921098, "loss": 0.8485, "step": 129320 }, { "epoch": 2.2705805930581646, "grad_norm": 0.0679823066805295, "learning_rate": 0.00011864202921479552, "loss": 0.8384, "step": 129330 }, { "epoch": 2.270756157938166, "grad_norm": 0.06832794344263073, "learning_rate": 0.00011863106383685221, "loss": 0.8401, "step": 129340 }, { "epoch": 2.2709317228181676, "grad_norm": 0.06358627326632904, "learning_rate": 0.00011862009823551991, "loss": 0.8465, "step": 129350 }, { "epoch": 2.271107287698169, "grad_norm": 0.050006104767864455, "learning_rate": 0.0001186091324109376, "loss": 0.8451, "step": 129360 }, { "epoch": 2.27128285257817, "grad_norm": 0.06332275528794809, "learning_rate": 0.00011859816636324423, "loss": 0.8526, "step": 129370 }, { "epoch": 2.2714584174581716, "grad_norm": 0.045909283492552265, "learning_rate": 0.00011858720009257866, "loss": 0.8477, "step": 129380 }, { "epoch": 2.271633982338173, "grad_norm": 0.056710918397080826, "learning_rate": 0.00011857623359907996, "loss": 0.8404, "step": 129390 }, { "epoch": 2.2718095472181745, "grad_norm": 0.07986701405353673, "learning_rate": 0.00011856526688288697, "loss": 0.8467, "step": 129400 }, { "epoch": 2.271985112098176, "grad_norm": 0.058751586458308025, "learning_rate": 0.00011855429994413872, "loss": 0.8494, "step": 129410 }, { "epoch": 2.2721606769781775, "grad_norm": 0.056924664107089636, "learning_rate": 0.00011854333278297409, "loss": 0.8416, "step": 129420 }, { "epoch": 2.2723362418581785, "grad_norm": 0.05195922820050257, "learning_rate": 0.00011853236539953208, "loss": 0.8399, "step": 129430 }, { "epoch": 2.27251180673818, "grad_norm": 0.05887344258579035, "learning_rate": 0.00011852139779395165, "loss": 0.8548, "step": 129440 }, { "epoch": 2.2726873716181815, "grad_norm": 0.05113652805241019, "learning_rate": 0.00011851042996637174, "loss": 0.847, "step": 129450 }, { "epoch": 2.272862936498183, "grad_norm": 0.06754436247590408, "learning_rate": 0.00011849946191693133, "loss": 0.8387, "step": 129460 }, { "epoch": 2.2730385013781844, "grad_norm": 0.05423731463884294, "learning_rate": 0.00011848849364576937, "loss": 0.8373, "step": 129470 }, { "epoch": 2.273214066258186, "grad_norm": 0.045430220203175796, "learning_rate": 0.00011847752515302488, "loss": 0.8445, "step": 129480 }, { "epoch": 2.273389631138187, "grad_norm": 0.04471582660871143, "learning_rate": 0.00011846655643883677, "loss": 0.8398, "step": 129490 }, { "epoch": 2.2735651960181884, "grad_norm": 0.07043807658896724, "learning_rate": 0.00011845558750334405, "loss": 0.8523, "step": 129500 }, { "epoch": 2.27374076089819, "grad_norm": 0.07890608960812155, "learning_rate": 0.00011844461834668568, "loss": 0.8468, "step": 129510 }, { "epoch": 2.2739163257781914, "grad_norm": 0.0511565270027119, "learning_rate": 0.00011843364896900067, "loss": 0.8433, "step": 129520 }, { "epoch": 2.274091890658193, "grad_norm": 0.055853148133836426, "learning_rate": 0.00011842267937042797, "loss": 0.8503, "step": 129530 }, { "epoch": 2.2742674555381943, "grad_norm": 0.046149389880351464, "learning_rate": 0.0001184117095511066, "loss": 0.8381, "step": 129540 }, { "epoch": 2.274443020418196, "grad_norm": 0.07004655837883492, "learning_rate": 0.00011840073951117551, "loss": 0.8403, "step": 129550 }, { "epoch": 2.274618585298197, "grad_norm": 0.057593870100165846, "learning_rate": 0.00011838976925077374, "loss": 0.84, "step": 129560 }, { "epoch": 2.2747941501781983, "grad_norm": 0.051371258155804414, "learning_rate": 0.00011837879877004026, "loss": 0.8394, "step": 129570 }, { "epoch": 2.2749697150582, "grad_norm": 0.046230762401157026, "learning_rate": 0.00011836782806911407, "loss": 0.8401, "step": 129580 }, { "epoch": 2.2751452799382013, "grad_norm": 0.051863447218863344, "learning_rate": 0.00011835685714813414, "loss": 0.846, "step": 129590 }, { "epoch": 2.2753208448182027, "grad_norm": 0.05719602078398104, "learning_rate": 0.00011834588600723952, "loss": 0.8418, "step": 129600 }, { "epoch": 2.2754964096982038, "grad_norm": 0.06544422152431038, "learning_rate": 0.0001183349146465692, "loss": 0.8434, "step": 129610 }, { "epoch": 2.2756719745782052, "grad_norm": 0.06285023466739272, "learning_rate": 0.00011832394306626219, "loss": 0.8448, "step": 129620 }, { "epoch": 2.2758475394582067, "grad_norm": 0.044911014470704655, "learning_rate": 0.00011831297126645751, "loss": 0.8412, "step": 129630 }, { "epoch": 2.276023104338208, "grad_norm": 0.0479992437868216, "learning_rate": 0.00011830199924729417, "loss": 0.8489, "step": 129640 }, { "epoch": 2.2761986692182097, "grad_norm": 0.05117141817460023, "learning_rate": 0.00011829102700891119, "loss": 0.8429, "step": 129650 }, { "epoch": 2.276374234098211, "grad_norm": 0.06416624383069706, "learning_rate": 0.00011828005455144753, "loss": 0.8352, "step": 129660 }, { "epoch": 2.2765497989782126, "grad_norm": 0.065749076321207, "learning_rate": 0.00011826908187504231, "loss": 0.8318, "step": 129670 }, { "epoch": 2.2767253638582137, "grad_norm": 0.06279565600557396, "learning_rate": 0.00011825810897983452, "loss": 0.8514, "step": 129680 }, { "epoch": 2.276900928738215, "grad_norm": 0.05739575792114748, "learning_rate": 0.00011824713586596314, "loss": 0.8428, "step": 129690 }, { "epoch": 2.2770764936182166, "grad_norm": 0.0717804458339436, "learning_rate": 0.00011823616253356729, "loss": 0.8435, "step": 129700 }, { "epoch": 2.277252058498218, "grad_norm": 0.053729285338981386, "learning_rate": 0.00011822518898278594, "loss": 0.8504, "step": 129710 }, { "epoch": 2.2774276233782196, "grad_norm": 0.05559735601709704, "learning_rate": 0.00011821421521375815, "loss": 0.8396, "step": 129720 }, { "epoch": 2.2776031882582206, "grad_norm": 0.05150264886612537, "learning_rate": 0.00011820324122662294, "loss": 0.8505, "step": 129730 }, { "epoch": 2.277778753138222, "grad_norm": 0.047813267147626765, "learning_rate": 0.00011819226702151938, "loss": 0.846, "step": 129740 }, { "epoch": 2.2779543180182236, "grad_norm": 0.0466886593579533, "learning_rate": 0.0001181812925985865, "loss": 0.8446, "step": 129750 }, { "epoch": 2.278129882898225, "grad_norm": 0.07339992714141004, "learning_rate": 0.00011817031795796335, "loss": 0.8548, "step": 129760 }, { "epoch": 2.2783054477782265, "grad_norm": 0.06314719042527846, "learning_rate": 0.00011815934309978895, "loss": 0.8472, "step": 129770 }, { "epoch": 2.278481012658228, "grad_norm": 0.05314529959875097, "learning_rate": 0.00011814836802420244, "loss": 0.8472, "step": 129780 }, { "epoch": 2.2786565775382295, "grad_norm": 0.0514973275901852, "learning_rate": 0.00011813739273134276, "loss": 0.8448, "step": 129790 }, { "epoch": 2.2788321424182305, "grad_norm": 0.060307078092409705, "learning_rate": 0.00011812641722134908, "loss": 0.8425, "step": 129800 }, { "epoch": 2.279007707298232, "grad_norm": 0.05577619142405697, "learning_rate": 0.0001181154414943604, "loss": 0.847, "step": 129810 }, { "epoch": 2.2791832721782335, "grad_norm": 0.05057814469937263, "learning_rate": 0.00011810446555051581, "loss": 0.8495, "step": 129820 }, { "epoch": 2.279358837058235, "grad_norm": 0.06582348791286638, "learning_rate": 0.00011809348938995432, "loss": 0.8524, "step": 129830 }, { "epoch": 2.2795344019382364, "grad_norm": 0.05118899656874646, "learning_rate": 0.00011808251301281505, "loss": 0.8473, "step": 129840 }, { "epoch": 2.2797099668182375, "grad_norm": 0.04250705938769466, "learning_rate": 0.00011807153641923712, "loss": 0.8493, "step": 129850 }, { "epoch": 2.279885531698239, "grad_norm": 0.05805244291247016, "learning_rate": 0.0001180605596093595, "loss": 0.8431, "step": 129860 }, { "epoch": 2.2800610965782404, "grad_norm": 0.06231957840555816, "learning_rate": 0.00011804958258332133, "loss": 0.8383, "step": 129870 }, { "epoch": 2.280236661458242, "grad_norm": 0.05472550870322227, "learning_rate": 0.00011803860534126169, "loss": 0.8385, "step": 129880 }, { "epoch": 2.2804122263382434, "grad_norm": 0.05419459268270777, "learning_rate": 0.00011802762788331965, "loss": 0.8424, "step": 129890 }, { "epoch": 2.280587791218245, "grad_norm": 0.0632593393844415, "learning_rate": 0.0001180166502096343, "loss": 0.8478, "step": 129900 }, { "epoch": 2.2807633560982463, "grad_norm": 0.0600316395811481, "learning_rate": 0.00011800567232034472, "loss": 0.8497, "step": 129910 }, { "epoch": 2.2809389209782474, "grad_norm": 0.04796001099166595, "learning_rate": 0.00011799469421559004, "loss": 0.8543, "step": 129920 }, { "epoch": 2.281114485858249, "grad_norm": 0.049883508043356846, "learning_rate": 0.00011798371589550932, "loss": 0.8417, "step": 129930 }, { "epoch": 2.2812900507382503, "grad_norm": 0.050490564478985485, "learning_rate": 0.00011797273736024163, "loss": 0.8448, "step": 129940 }, { "epoch": 2.281465615618252, "grad_norm": 0.060264121720865725, "learning_rate": 0.00011796175860992617, "loss": 0.8427, "step": 129950 }, { "epoch": 2.2816411804982533, "grad_norm": 0.07362048454170758, "learning_rate": 0.00011795077964470191, "loss": 0.8413, "step": 129960 }, { "epoch": 2.2818167453782543, "grad_norm": 0.07903753733159112, "learning_rate": 0.00011793980046470805, "loss": 0.8493, "step": 129970 }, { "epoch": 2.2819923102582558, "grad_norm": 0.059410934266897295, "learning_rate": 0.00011792882107008367, "loss": 0.8456, "step": 129980 }, { "epoch": 2.2821678751382573, "grad_norm": 0.049542601398926175, "learning_rate": 0.00011791784146096791, "loss": 0.8511, "step": 129990 }, { "epoch": 2.2823434400182587, "grad_norm": 0.049202953978799466, "learning_rate": 0.00011790686163749984, "loss": 0.84, "step": 130000 }, { "epoch": 2.28251900489826, "grad_norm": 0.05690058778022031, "learning_rate": 0.00011789588159981857, "loss": 0.8479, "step": 130010 }, { "epoch": 2.2826945697782617, "grad_norm": 0.06180183839489033, "learning_rate": 0.00011788490134806327, "loss": 0.8439, "step": 130020 }, { "epoch": 2.282870134658263, "grad_norm": 0.06771560229280559, "learning_rate": 0.00011787392088237302, "loss": 0.8385, "step": 130030 }, { "epoch": 2.283045699538264, "grad_norm": 0.052878148800075235, "learning_rate": 0.00011786294020288694, "loss": 0.845, "step": 130040 }, { "epoch": 2.2832212644182657, "grad_norm": 0.062006793770580694, "learning_rate": 0.00011785195930974422, "loss": 0.8457, "step": 130050 }, { "epoch": 2.283396829298267, "grad_norm": 0.04911647141595334, "learning_rate": 0.00011784097820308393, "loss": 0.846, "step": 130060 }, { "epoch": 2.2835723941782686, "grad_norm": 0.044417913639716144, "learning_rate": 0.00011782999688304521, "loss": 0.8509, "step": 130070 }, { "epoch": 2.28374795905827, "grad_norm": 0.0623140963752634, "learning_rate": 0.00011781901534976718, "loss": 0.8449, "step": 130080 }, { "epoch": 2.283923523938271, "grad_norm": 0.0581410031430696, "learning_rate": 0.00011780803360338904, "loss": 0.8382, "step": 130090 }, { "epoch": 2.2840990888182726, "grad_norm": 0.04751549792219519, "learning_rate": 0.00011779705164404989, "loss": 0.8372, "step": 130100 }, { "epoch": 2.284274653698274, "grad_norm": 0.054055911529362614, "learning_rate": 0.00011778606947188885, "loss": 0.8345, "step": 130110 }, { "epoch": 2.2844502185782756, "grad_norm": 0.048129277851087934, "learning_rate": 0.00011777508708704514, "loss": 0.8483, "step": 130120 }, { "epoch": 2.284625783458277, "grad_norm": 0.10154387850153107, "learning_rate": 0.00011776410448965782, "loss": 0.8441, "step": 130130 }, { "epoch": 2.2848013483382785, "grad_norm": 0.06942838827598717, "learning_rate": 0.0001177531216798661, "loss": 0.8475, "step": 130140 }, { "epoch": 2.28497691321828, "grad_norm": 0.041660276723956065, "learning_rate": 0.0001177421386578091, "loss": 0.8493, "step": 130150 }, { "epoch": 2.285152478098281, "grad_norm": 0.04971913168920996, "learning_rate": 0.00011773115542362602, "loss": 0.8495, "step": 130160 }, { "epoch": 2.2853280429782825, "grad_norm": 0.0540441073756219, "learning_rate": 0.00011772017197745598, "loss": 0.8399, "step": 130170 }, { "epoch": 2.285503607858284, "grad_norm": 0.05517951699811016, "learning_rate": 0.00011770918831943814, "loss": 0.8491, "step": 130180 }, { "epoch": 2.2856791727382855, "grad_norm": 0.0594662062307073, "learning_rate": 0.00011769820444971171, "loss": 0.845, "step": 130190 }, { "epoch": 2.285854737618287, "grad_norm": 0.08453504144678099, "learning_rate": 0.00011768722036841582, "loss": 0.8412, "step": 130200 }, { "epoch": 2.2860303024982884, "grad_norm": 0.05319165690961686, "learning_rate": 0.00011767623607568965, "loss": 0.85, "step": 130210 }, { "epoch": 2.2862058673782895, "grad_norm": 0.06848429385570928, "learning_rate": 0.00011766525157167237, "loss": 0.8381, "step": 130220 }, { "epoch": 2.286381432258291, "grad_norm": 0.06365929768802772, "learning_rate": 0.00011765426685650316, "loss": 0.8441, "step": 130230 }, { "epoch": 2.2865569971382924, "grad_norm": 0.05679083123805447, "learning_rate": 0.00011764328193032121, "loss": 0.8397, "step": 130240 }, { "epoch": 2.286732562018294, "grad_norm": 0.05881170328216437, "learning_rate": 0.00011763229679326567, "loss": 0.8514, "step": 130250 }, { "epoch": 2.2869081268982954, "grad_norm": 0.04939913909688884, "learning_rate": 0.00011762131144547575, "loss": 0.8452, "step": 130260 }, { "epoch": 2.287083691778297, "grad_norm": 0.05299339532555847, "learning_rate": 0.00011761032588709065, "loss": 0.8463, "step": 130270 }, { "epoch": 2.287259256658298, "grad_norm": 0.05375639071371556, "learning_rate": 0.00011759934011824949, "loss": 0.8477, "step": 130280 }, { "epoch": 2.2874348215382994, "grad_norm": 0.05592721852949636, "learning_rate": 0.00011758835413909155, "loss": 0.8416, "step": 130290 }, { "epoch": 2.287610386418301, "grad_norm": 0.06963610907332314, "learning_rate": 0.00011757736794975594, "loss": 0.8427, "step": 130300 }, { "epoch": 2.2877859512983023, "grad_norm": 0.04847255716639059, "learning_rate": 0.00011756638155038193, "loss": 0.8446, "step": 130310 }, { "epoch": 2.287961516178304, "grad_norm": 0.050448783392512075, "learning_rate": 0.00011755539494110867, "loss": 0.8427, "step": 130320 }, { "epoch": 2.2881370810583053, "grad_norm": 0.04510595109748189, "learning_rate": 0.00011754440812207545, "loss": 0.8515, "step": 130330 }, { "epoch": 2.2883126459383067, "grad_norm": 0.05140627605238121, "learning_rate": 0.00011753342109342136, "loss": 0.8447, "step": 130340 }, { "epoch": 2.2884882108183078, "grad_norm": 0.05260233603945596, "learning_rate": 0.00011752243385528563, "loss": 0.8421, "step": 130350 }, { "epoch": 2.2886637756983093, "grad_norm": 0.06114952751444475, "learning_rate": 0.00011751144640780754, "loss": 0.8433, "step": 130360 }, { "epoch": 2.2888393405783107, "grad_norm": 0.047542308429475676, "learning_rate": 0.00011750045875112624, "loss": 0.8501, "step": 130370 }, { "epoch": 2.289014905458312, "grad_norm": 0.05044354449855565, "learning_rate": 0.00011748947088538095, "loss": 0.852, "step": 130380 }, { "epoch": 2.2891904703383137, "grad_norm": 0.044438386448040774, "learning_rate": 0.00011747848281071097, "loss": 0.8468, "step": 130390 }, { "epoch": 2.2893660352183147, "grad_norm": 0.057564139439678676, "learning_rate": 0.00011746749452725539, "loss": 0.8435, "step": 130400 }, { "epoch": 2.289541600098316, "grad_norm": 0.06920402807724939, "learning_rate": 0.0001174565060351535, "loss": 0.8501, "step": 130410 }, { "epoch": 2.2897171649783177, "grad_norm": 0.055609009664820186, "learning_rate": 0.00011744551733454454, "loss": 0.847, "step": 130420 }, { "epoch": 2.289892729858319, "grad_norm": 0.047779324544521905, "learning_rate": 0.00011743452842556772, "loss": 0.8469, "step": 130430 }, { "epoch": 2.2900682947383206, "grad_norm": 0.05124842457985409, "learning_rate": 0.0001174235393083623, "loss": 0.8449, "step": 130440 }, { "epoch": 2.290243859618322, "grad_norm": 0.08912957711361275, "learning_rate": 0.00011741254998306745, "loss": 0.8502, "step": 130450 }, { "epoch": 2.2904194244983236, "grad_norm": 0.07167276541809502, "learning_rate": 0.00011740156044982249, "loss": 0.8418, "step": 130460 }, { "epoch": 2.2905949893783246, "grad_norm": 0.06497985247101781, "learning_rate": 0.00011739057070876655, "loss": 0.8454, "step": 130470 }, { "epoch": 2.290770554258326, "grad_norm": 0.06648918959950612, "learning_rate": 0.00011737958076003899, "loss": 0.8486, "step": 130480 }, { "epoch": 2.2909461191383276, "grad_norm": 0.06772107189436849, "learning_rate": 0.00011736859060377901, "loss": 0.8417, "step": 130490 }, { "epoch": 2.291121684018329, "grad_norm": 0.053668656184205056, "learning_rate": 0.00011735760024012583, "loss": 0.8395, "step": 130500 }, { "epoch": 2.2912972488983305, "grad_norm": 0.07510604008406604, "learning_rate": 0.00011734660966921873, "loss": 0.8488, "step": 130510 }, { "epoch": 2.2914728137783316, "grad_norm": 0.05158789462442846, "learning_rate": 0.0001173356188911969, "loss": 0.8479, "step": 130520 }, { "epoch": 2.291648378658333, "grad_norm": 0.06470517682941958, "learning_rate": 0.00011732462790619974, "loss": 0.8477, "step": 130530 }, { "epoch": 2.2918239435383345, "grad_norm": 0.04863102973462872, "learning_rate": 0.00011731363671436635, "loss": 0.8444, "step": 130540 }, { "epoch": 2.291999508418336, "grad_norm": 0.0782203122657899, "learning_rate": 0.00011730264531583608, "loss": 0.8416, "step": 130550 }, { "epoch": 2.2921750732983375, "grad_norm": 0.05389007029814104, "learning_rate": 0.00011729165371074817, "loss": 0.8435, "step": 130560 }, { "epoch": 2.292350638178339, "grad_norm": 0.06152637476984627, "learning_rate": 0.00011728066189924187, "loss": 0.85, "step": 130570 }, { "epoch": 2.2925262030583404, "grad_norm": 0.063514584697635, "learning_rate": 0.00011726966988145646, "loss": 0.8487, "step": 130580 }, { "epoch": 2.2927017679383415, "grad_norm": 0.057610394805870366, "learning_rate": 0.00011725867765753123, "loss": 0.8394, "step": 130590 }, { "epoch": 2.292877332818343, "grad_norm": 0.04919133325890918, "learning_rate": 0.00011724768522760544, "loss": 0.8476, "step": 130600 }, { "epoch": 2.2930528976983444, "grad_norm": 0.06350863242915192, "learning_rate": 0.00011723669259181837, "loss": 0.8506, "step": 130610 }, { "epoch": 2.293228462578346, "grad_norm": 0.051005808380606664, "learning_rate": 0.00011722569975030928, "loss": 0.8395, "step": 130620 }, { "epoch": 2.2934040274583474, "grad_norm": 0.057763749670116396, "learning_rate": 0.00011721470670321748, "loss": 0.8471, "step": 130630 }, { "epoch": 2.2935795923383484, "grad_norm": 0.06863965411934685, "learning_rate": 0.0001172037134506822, "loss": 0.8404, "step": 130640 }, { "epoch": 2.29375515721835, "grad_norm": 0.07074613565051649, "learning_rate": 0.0001171927199928428, "loss": 0.8478, "step": 130650 }, { "epoch": 2.2939307220983514, "grad_norm": 0.0645219576206751, "learning_rate": 0.0001171817263298385, "loss": 0.8402, "step": 130660 }, { "epoch": 2.294106286978353, "grad_norm": 0.05282560305240269, "learning_rate": 0.0001171707324618087, "loss": 0.8455, "step": 130670 }, { "epoch": 2.2942818518583543, "grad_norm": 0.0462105874389878, "learning_rate": 0.00011715973838889256, "loss": 0.847, "step": 130680 }, { "epoch": 2.294457416738356, "grad_norm": 0.055937144814213864, "learning_rate": 0.00011714874411122943, "loss": 0.8407, "step": 130690 }, { "epoch": 2.2946329816183573, "grad_norm": 0.055192649657853685, "learning_rate": 0.00011713774962895863, "loss": 0.8448, "step": 130700 }, { "epoch": 2.2948085464983583, "grad_norm": 0.05254435924693565, "learning_rate": 0.00011712675494221945, "loss": 0.8447, "step": 130710 }, { "epoch": 2.29498411137836, "grad_norm": 0.05313263087382454, "learning_rate": 0.00011711576005115121, "loss": 0.845, "step": 130720 }, { "epoch": 2.2951596762583613, "grad_norm": 0.056783198574692655, "learning_rate": 0.00011710476495589322, "loss": 0.8399, "step": 130730 }, { "epoch": 2.2953352411383627, "grad_norm": 0.05676774145304584, "learning_rate": 0.00011709376965658472, "loss": 0.8504, "step": 130740 }, { "epoch": 2.295510806018364, "grad_norm": 0.039689195141928396, "learning_rate": 0.0001170827741533651, "loss": 0.851, "step": 130750 }, { "epoch": 2.2956863708983652, "grad_norm": 0.052842543127268445, "learning_rate": 0.00011707177844637362, "loss": 0.8512, "step": 130760 }, { "epoch": 2.2958619357783667, "grad_norm": 0.07604636442162552, "learning_rate": 0.00011706078253574968, "loss": 0.8476, "step": 130770 }, { "epoch": 2.296037500658368, "grad_norm": 0.04892689862736303, "learning_rate": 0.00011704978642163254, "loss": 0.8433, "step": 130780 }, { "epoch": 2.2962130655383697, "grad_norm": 0.08469173108161632, "learning_rate": 0.0001170387901041615, "loss": 0.8401, "step": 130790 }, { "epoch": 2.296388630418371, "grad_norm": 0.06784370279027023, "learning_rate": 0.00011702779358347593, "loss": 0.8447, "step": 130800 }, { "epoch": 2.2965641952983726, "grad_norm": 0.061389314729068514, "learning_rate": 0.00011701679685971511, "loss": 0.8479, "step": 130810 }, { "epoch": 2.296739760178374, "grad_norm": 0.05501315736259036, "learning_rate": 0.00011700579993301846, "loss": 0.8434, "step": 130820 }, { "epoch": 2.296915325058375, "grad_norm": 0.057655297129310154, "learning_rate": 0.00011699480280352522, "loss": 0.8524, "step": 130830 }, { "epoch": 2.2970908899383766, "grad_norm": 0.046803342049697265, "learning_rate": 0.0001169838054713748, "loss": 0.8528, "step": 130840 }, { "epoch": 2.297266454818378, "grad_norm": 0.048913943157529165, "learning_rate": 0.00011697280793670646, "loss": 0.8453, "step": 130850 }, { "epoch": 2.2974420196983796, "grad_norm": 0.07471874882566194, "learning_rate": 0.00011696181019965957, "loss": 0.8385, "step": 130860 }, { "epoch": 2.297617584578381, "grad_norm": 0.06041418554543387, "learning_rate": 0.00011695081226037354, "loss": 0.8364, "step": 130870 }, { "epoch": 2.2977931494583825, "grad_norm": 0.06086648925223601, "learning_rate": 0.00011693981411898762, "loss": 0.8448, "step": 130880 }, { "epoch": 2.2979687143383836, "grad_norm": 0.055335986676683185, "learning_rate": 0.00011692881577564123, "loss": 0.8389, "step": 130890 }, { "epoch": 2.298144279218385, "grad_norm": 0.10178989791892439, "learning_rate": 0.00011691781723047367, "loss": 0.8436, "step": 130900 }, { "epoch": 2.2983198440983865, "grad_norm": 0.059629490187756276, "learning_rate": 0.00011690681848362432, "loss": 0.8502, "step": 130910 }, { "epoch": 2.298495408978388, "grad_norm": 0.0651289303107902, "learning_rate": 0.00011689581953523255, "loss": 0.8429, "step": 130920 }, { "epoch": 2.2986709738583895, "grad_norm": 0.04374152360222973, "learning_rate": 0.00011688482038543767, "loss": 0.8497, "step": 130930 }, { "epoch": 2.298846538738391, "grad_norm": 0.05892319131229731, "learning_rate": 0.00011687382103437909, "loss": 0.8404, "step": 130940 }, { "epoch": 2.299022103618392, "grad_norm": 0.04978530310643424, "learning_rate": 0.00011686282148219616, "loss": 0.8408, "step": 130950 }, { "epoch": 2.2991976684983935, "grad_norm": 0.05195321263105229, "learning_rate": 0.00011685182172902822, "loss": 0.8499, "step": 130960 }, { "epoch": 2.299373233378395, "grad_norm": 0.07334988573597757, "learning_rate": 0.00011684082177501468, "loss": 0.838, "step": 130970 }, { "epoch": 2.2995487982583964, "grad_norm": 0.046647803663918706, "learning_rate": 0.00011682982162029487, "loss": 0.8409, "step": 130980 }, { "epoch": 2.299724363138398, "grad_norm": 0.08921207579025307, "learning_rate": 0.00011681882126500822, "loss": 0.8409, "step": 130990 }, { "epoch": 2.2998999280183994, "grad_norm": 0.05659291854804101, "learning_rate": 0.00011680782070929403, "loss": 0.847, "step": 131000 }, { "epoch": 2.300075492898401, "grad_norm": 0.053944679076715886, "learning_rate": 0.00011679681995329177, "loss": 0.8481, "step": 131010 }, { "epoch": 2.300251057778402, "grad_norm": 0.06009744222256656, "learning_rate": 0.00011678581899714074, "loss": 0.8434, "step": 131020 }, { "epoch": 2.3004266226584034, "grad_norm": 0.05006073568367983, "learning_rate": 0.00011677481784098037, "loss": 0.8455, "step": 131030 }, { "epoch": 2.300602187538405, "grad_norm": 0.05805742307286228, "learning_rate": 0.00011676381648495002, "loss": 0.8393, "step": 131040 }, { "epoch": 2.3007777524184063, "grad_norm": 0.09761942304386731, "learning_rate": 0.00011675281492918911, "loss": 0.8508, "step": 131050 }, { "epoch": 2.300953317298408, "grad_norm": 0.062304784438277416, "learning_rate": 0.00011674181317383704, "loss": 0.8448, "step": 131060 }, { "epoch": 2.301128882178409, "grad_norm": 0.04950995348021109, "learning_rate": 0.00011673081121903315, "loss": 0.8487, "step": 131070 }, { "epoch": 2.3013044470584103, "grad_norm": 0.06856512245848703, "learning_rate": 0.00011671980906491685, "loss": 0.8375, "step": 131080 }, { "epoch": 2.301480011938412, "grad_norm": 0.06311829583637261, "learning_rate": 0.00011670880671162759, "loss": 0.8464, "step": 131090 }, { "epoch": 2.3016555768184133, "grad_norm": 0.07822104045843159, "learning_rate": 0.00011669780415930471, "loss": 0.8441, "step": 131100 }, { "epoch": 2.3018311416984147, "grad_norm": 0.08341359681840273, "learning_rate": 0.00011668680140808765, "loss": 0.8473, "step": 131110 }, { "epoch": 2.302006706578416, "grad_norm": 0.061679289119548175, "learning_rate": 0.00011667579845811584, "loss": 0.8503, "step": 131120 }, { "epoch": 2.3021822714584177, "grad_norm": 0.05040790863303951, "learning_rate": 0.00011666479530952864, "loss": 0.8466, "step": 131130 }, { "epoch": 2.3023578363384187, "grad_norm": 0.05240311946518498, "learning_rate": 0.00011665379196246547, "loss": 0.8445, "step": 131140 }, { "epoch": 2.30253340121842, "grad_norm": 0.05030028796647428, "learning_rate": 0.00011664278841706575, "loss": 0.841, "step": 131150 }, { "epoch": 2.3027089660984217, "grad_norm": 0.06266554834575773, "learning_rate": 0.00011663178467346893, "loss": 0.8341, "step": 131160 }, { "epoch": 2.302884530978423, "grad_norm": 0.051124954481729795, "learning_rate": 0.00011662078073181437, "loss": 0.8532, "step": 131170 }, { "epoch": 2.3030600958584246, "grad_norm": 0.0634984620013231, "learning_rate": 0.00011660977659224157, "loss": 0.8443, "step": 131180 }, { "epoch": 2.3032356607384257, "grad_norm": 0.047587590116746965, "learning_rate": 0.00011659877225488989, "loss": 0.8479, "step": 131190 }, { "epoch": 2.303411225618427, "grad_norm": 0.04628180314081065, "learning_rate": 0.00011658776771989875, "loss": 0.84, "step": 131200 }, { "epoch": 2.3035867904984286, "grad_norm": 0.07793241271268357, "learning_rate": 0.00011657676298740763, "loss": 0.8497, "step": 131210 }, { "epoch": 2.30376235537843, "grad_norm": 0.05174461494623339, "learning_rate": 0.00011656575805755592, "loss": 0.8443, "step": 131220 }, { "epoch": 2.3039379202584316, "grad_norm": 0.053102632686541355, "learning_rate": 0.00011655475293048308, "loss": 0.8411, "step": 131230 }, { "epoch": 2.304113485138433, "grad_norm": 0.07300162656693761, "learning_rate": 0.00011654374760632855, "loss": 0.8434, "step": 131240 }, { "epoch": 2.3042890500184345, "grad_norm": 0.04976763921912537, "learning_rate": 0.00011653274208523175, "loss": 0.8446, "step": 131250 }, { "epoch": 2.3044646148984356, "grad_norm": 0.09929392818056601, "learning_rate": 0.00011652173636733212, "loss": 0.8391, "step": 131260 }, { "epoch": 2.304640179778437, "grad_norm": 0.05117932122731828, "learning_rate": 0.00011651073045276915, "loss": 0.8416, "step": 131270 }, { "epoch": 2.3048157446584385, "grad_norm": 0.05247397611054597, "learning_rate": 0.00011649972434168221, "loss": 0.8433, "step": 131280 }, { "epoch": 2.30499130953844, "grad_norm": 0.044199033580002114, "learning_rate": 0.00011648871803421084, "loss": 0.8513, "step": 131290 }, { "epoch": 2.3051668744184415, "grad_norm": 0.05644080909086009, "learning_rate": 0.00011647771153049443, "loss": 0.8456, "step": 131300 }, { "epoch": 2.3053424392984425, "grad_norm": 0.050575623546793534, "learning_rate": 0.00011646670483067242, "loss": 0.8471, "step": 131310 }, { "epoch": 2.305518004178444, "grad_norm": 0.05233496027446299, "learning_rate": 0.0001164556979348843, "loss": 0.8534, "step": 131320 }, { "epoch": 2.3056935690584455, "grad_norm": 0.05201036736760837, "learning_rate": 0.00011644469084326954, "loss": 0.8465, "step": 131330 }, { "epoch": 2.305869133938447, "grad_norm": 0.08620030261254873, "learning_rate": 0.00011643368355596758, "loss": 0.8424, "step": 131340 }, { "epoch": 2.3060446988184484, "grad_norm": 0.05145760583863141, "learning_rate": 0.00011642267607311792, "loss": 0.8439, "step": 131350 }, { "epoch": 2.30622026369845, "grad_norm": 0.05354654935663564, "learning_rate": 0.00011641166839485995, "loss": 0.8361, "step": 131360 }, { "epoch": 2.3063958285784514, "grad_norm": 0.048402006943001376, "learning_rate": 0.00011640066052133322, "loss": 0.8407, "step": 131370 }, { "epoch": 2.3065713934584524, "grad_norm": 0.07698078990030632, "learning_rate": 0.00011638965245267714, "loss": 0.8441, "step": 131380 }, { "epoch": 2.306746958338454, "grad_norm": 0.054625548257047345, "learning_rate": 0.00011637864418903124, "loss": 0.8428, "step": 131390 }, { "epoch": 2.3069225232184554, "grad_norm": 0.05450211837190775, "learning_rate": 0.00011636763573053498, "loss": 0.8414, "step": 131400 }, { "epoch": 2.307098088098457, "grad_norm": 0.05890585086277362, "learning_rate": 0.00011635662707732782, "loss": 0.8403, "step": 131410 }, { "epoch": 2.3072736529784583, "grad_norm": 0.04967149216693062, "learning_rate": 0.00011634561822954923, "loss": 0.8413, "step": 131420 }, { "epoch": 2.3074492178584594, "grad_norm": 0.05275346838447762, "learning_rate": 0.00011633460918733874, "loss": 0.8539, "step": 131430 }, { "epoch": 2.307624782738461, "grad_norm": 0.04216600427002104, "learning_rate": 0.00011632359995083581, "loss": 0.851, "step": 131440 }, { "epoch": 2.3078003476184623, "grad_norm": 0.06382032780914622, "learning_rate": 0.00011631259052017994, "loss": 0.8413, "step": 131450 }, { "epoch": 2.307975912498464, "grad_norm": 0.06372946525137596, "learning_rate": 0.0001163015808955106, "loss": 0.8487, "step": 131460 }, { "epoch": 2.3081514773784653, "grad_norm": 0.041429792205550715, "learning_rate": 0.00011629057107696728, "loss": 0.8373, "step": 131470 }, { "epoch": 2.3083270422584667, "grad_norm": 0.053559895566608194, "learning_rate": 0.00011627956106468953, "loss": 0.8461, "step": 131480 }, { "epoch": 2.308502607138468, "grad_norm": 0.05381463775422348, "learning_rate": 0.00011626855085881678, "loss": 0.8502, "step": 131490 }, { "epoch": 2.3086781720184693, "grad_norm": 0.06958233597383069, "learning_rate": 0.0001162575404594886, "loss": 0.8497, "step": 131500 }, { "epoch": 2.3088537368984707, "grad_norm": 0.06608289626758987, "learning_rate": 0.00011624652986684444, "loss": 0.8457, "step": 131510 }, { "epoch": 2.309029301778472, "grad_norm": 0.048163294898753546, "learning_rate": 0.00011623551908102384, "loss": 0.8439, "step": 131520 }, { "epoch": 2.3092048666584737, "grad_norm": 0.060141934038349915, "learning_rate": 0.00011622450810216625, "loss": 0.8416, "step": 131530 }, { "epoch": 2.309380431538475, "grad_norm": 0.05694419463382744, "learning_rate": 0.00011621349693041124, "loss": 0.8445, "step": 131540 }, { "epoch": 2.3095559964184766, "grad_norm": 0.05867368916359734, "learning_rate": 0.00011620248556589834, "loss": 0.847, "step": 131550 }, { "epoch": 2.3097315612984777, "grad_norm": 0.05300386029993695, "learning_rate": 0.000116191474008767, "loss": 0.8453, "step": 131560 }, { "epoch": 2.309907126178479, "grad_norm": 0.05899056874402319, "learning_rate": 0.00011618046225915679, "loss": 0.8508, "step": 131570 }, { "epoch": 2.3100826910584806, "grad_norm": 0.052619804211176256, "learning_rate": 0.00011616945031720723, "loss": 0.8437, "step": 131580 }, { "epoch": 2.310258255938482, "grad_norm": 0.05212674290772514, "learning_rate": 0.00011615843818305779, "loss": 0.843, "step": 131590 }, { "epoch": 2.3104338208184836, "grad_norm": 0.07493472543276285, "learning_rate": 0.00011614742585684807, "loss": 0.8409, "step": 131600 }, { "epoch": 2.310609385698485, "grad_norm": 0.0432054420620501, "learning_rate": 0.00011613641333871752, "loss": 0.8505, "step": 131610 }, { "epoch": 2.310784950578486, "grad_norm": 0.05055575162189773, "learning_rate": 0.00011612540062880574, "loss": 0.8436, "step": 131620 }, { "epoch": 2.3109605154584876, "grad_norm": 0.04942214384567626, "learning_rate": 0.00011611438772725225, "loss": 0.859, "step": 131630 }, { "epoch": 2.311136080338489, "grad_norm": 0.06055115115229979, "learning_rate": 0.00011610337463419653, "loss": 0.8484, "step": 131640 }, { "epoch": 2.3113116452184905, "grad_norm": 0.056671205739245924, "learning_rate": 0.00011609236134977818, "loss": 0.844, "step": 131650 }, { "epoch": 2.311487210098492, "grad_norm": 0.04976004681135234, "learning_rate": 0.00011608134787413671, "loss": 0.8338, "step": 131660 }, { "epoch": 2.3116627749784935, "grad_norm": 0.05068612885324807, "learning_rate": 0.00011607033420741171, "loss": 0.8488, "step": 131670 }, { "epoch": 2.3118383398584945, "grad_norm": 0.05696221287897958, "learning_rate": 0.00011605932034974264, "loss": 0.8509, "step": 131680 }, { "epoch": 2.312013904738496, "grad_norm": 0.06330975878779427, "learning_rate": 0.00011604830630126912, "loss": 0.8508, "step": 131690 }, { "epoch": 2.3121894696184975, "grad_norm": 0.0636138533330906, "learning_rate": 0.00011603729206213068, "loss": 0.8532, "step": 131700 }, { "epoch": 2.312365034498499, "grad_norm": 0.05393596369221491, "learning_rate": 0.00011602627763246682, "loss": 0.8457, "step": 131710 }, { "epoch": 2.3125405993785004, "grad_norm": 0.06471600158981274, "learning_rate": 0.0001160152630124172, "loss": 0.8572, "step": 131720 }, { "epoch": 2.312716164258502, "grad_norm": 0.05520742819875494, "learning_rate": 0.0001160042482021213, "loss": 0.8468, "step": 131730 }, { "epoch": 2.312891729138503, "grad_norm": 0.06825510427903188, "learning_rate": 0.0001159932332017187, "loss": 0.8451, "step": 131740 }, { "epoch": 2.3130672940185044, "grad_norm": 0.0491157910626316, "learning_rate": 0.00011598221801134897, "loss": 0.8412, "step": 131750 }, { "epoch": 2.313242858898506, "grad_norm": 0.051565602617125096, "learning_rate": 0.00011597120263115165, "loss": 0.8513, "step": 131760 }, { "epoch": 2.3134184237785074, "grad_norm": 0.07641030932102126, "learning_rate": 0.00011596018706126635, "loss": 0.845, "step": 131770 }, { "epoch": 2.313593988658509, "grad_norm": 0.08329072010095231, "learning_rate": 0.00011594917130183257, "loss": 0.8422, "step": 131780 }, { "epoch": 2.3137695535385103, "grad_norm": 0.0678388551972083, "learning_rate": 0.00011593815535298995, "loss": 0.8426, "step": 131790 }, { "epoch": 2.313945118418512, "grad_norm": 0.05041484969839633, "learning_rate": 0.00011592713921487806, "loss": 0.8527, "step": 131800 }, { "epoch": 2.314120683298513, "grad_norm": 0.06892421193249482, "learning_rate": 0.0001159161228876364, "loss": 0.8453, "step": 131810 }, { "epoch": 2.3142962481785143, "grad_norm": 0.04785766535020726, "learning_rate": 0.00011590510637140464, "loss": 0.8441, "step": 131820 }, { "epoch": 2.314471813058516, "grad_norm": 0.06556700112635146, "learning_rate": 0.00011589408966632234, "loss": 0.84, "step": 131830 }, { "epoch": 2.3146473779385173, "grad_norm": 0.06004423996784591, "learning_rate": 0.00011588307277252905, "loss": 0.8446, "step": 131840 }, { "epoch": 2.3148229428185187, "grad_norm": 0.04947966987789711, "learning_rate": 0.00011587205569016434, "loss": 0.8456, "step": 131850 }, { "epoch": 2.31499850769852, "grad_norm": 0.06713408611553574, "learning_rate": 0.00011586103841936793, "loss": 0.839, "step": 131860 }, { "epoch": 2.3151740725785213, "grad_norm": 0.06586212257832785, "learning_rate": 0.00011585002096027923, "loss": 0.8492, "step": 131870 }, { "epoch": 2.3153496374585227, "grad_norm": 0.06067289947614677, "learning_rate": 0.00011583900331303794, "loss": 0.8376, "step": 131880 }, { "epoch": 2.315525202338524, "grad_norm": 0.06901772268142187, "learning_rate": 0.00011582798547778366, "loss": 0.8415, "step": 131890 }, { "epoch": 2.3157007672185257, "grad_norm": 0.06982702721959523, "learning_rate": 0.00011581696745465593, "loss": 0.8341, "step": 131900 }, { "epoch": 2.315876332098527, "grad_norm": 0.071141246301235, "learning_rate": 0.0001158059492437944, "loss": 0.8442, "step": 131910 }, { "epoch": 2.3160518969785286, "grad_norm": 0.0628158616138464, "learning_rate": 0.00011579493084533866, "loss": 0.8364, "step": 131920 }, { "epoch": 2.3162274618585297, "grad_norm": 0.05774178562023637, "learning_rate": 0.00011578391225942826, "loss": 0.8404, "step": 131930 }, { "epoch": 2.316403026738531, "grad_norm": 0.05739360771457989, "learning_rate": 0.00011577289348620293, "loss": 0.8512, "step": 131940 }, { "epoch": 2.3165785916185326, "grad_norm": 0.0658199956845853, "learning_rate": 0.00011576187452580219, "loss": 0.8434, "step": 131950 }, { "epoch": 2.316754156498534, "grad_norm": 0.06106743794143059, "learning_rate": 0.00011575085537836565, "loss": 0.8417, "step": 131960 }, { "epoch": 2.3169297213785356, "grad_norm": 0.06365095463554989, "learning_rate": 0.00011573983604403295, "loss": 0.8512, "step": 131970 }, { "epoch": 2.3171052862585366, "grad_norm": 0.05166128641627272, "learning_rate": 0.0001157288165229437, "loss": 0.8423, "step": 131980 }, { "epoch": 2.317280851138538, "grad_norm": 0.07302910901052091, "learning_rate": 0.00011571779681523753, "loss": 0.844, "step": 131990 }, { "epoch": 2.3174564160185396, "grad_norm": 0.07233245301816965, "learning_rate": 0.00011570677692105405, "loss": 0.8497, "step": 132000 }, { "epoch": 2.317631980898541, "grad_norm": 0.06819798405951592, "learning_rate": 0.00011569575684053288, "loss": 0.8467, "step": 132010 }, { "epoch": 2.3178075457785425, "grad_norm": 0.05834454112901788, "learning_rate": 0.00011568473657381367, "loss": 0.8484, "step": 132020 }, { "epoch": 2.317983110658544, "grad_norm": 0.0431875702218534, "learning_rate": 0.00011567371612103603, "loss": 0.8414, "step": 132030 }, { "epoch": 2.3181586755385455, "grad_norm": 0.05557181398255899, "learning_rate": 0.00011566269548233958, "loss": 0.8386, "step": 132040 }, { "epoch": 2.3183342404185465, "grad_norm": 0.05272378529331595, "learning_rate": 0.00011565167465786399, "loss": 0.8471, "step": 132050 }, { "epoch": 2.318509805298548, "grad_norm": 0.038610718046955685, "learning_rate": 0.00011564065364774885, "loss": 0.8509, "step": 132060 }, { "epoch": 2.3186853701785495, "grad_norm": 0.05050281178718943, "learning_rate": 0.00011562963245213382, "loss": 0.8424, "step": 132070 }, { "epoch": 2.318860935058551, "grad_norm": 0.05493230583810596, "learning_rate": 0.00011561861107115859, "loss": 0.8446, "step": 132080 }, { "epoch": 2.3190364999385524, "grad_norm": 0.055919919139637696, "learning_rate": 0.00011560758950496271, "loss": 0.8446, "step": 132090 }, { "epoch": 2.3192120648185535, "grad_norm": 0.058064895337602826, "learning_rate": 0.00011559656775368588, "loss": 0.8488, "step": 132100 }, { "epoch": 2.319387629698555, "grad_norm": 0.06463800397202103, "learning_rate": 0.00011558554581746776, "loss": 0.8412, "step": 132110 }, { "epoch": 2.3195631945785564, "grad_norm": 0.06499608428821546, "learning_rate": 0.00011557452369644794, "loss": 0.836, "step": 132120 }, { "epoch": 2.319738759458558, "grad_norm": 0.05027817301085774, "learning_rate": 0.00011556350139076618, "loss": 0.8434, "step": 132130 }, { "epoch": 2.3199143243385594, "grad_norm": 0.06827940621544407, "learning_rate": 0.000115552478900562, "loss": 0.8418, "step": 132140 }, { "epoch": 2.320089889218561, "grad_norm": 0.04965600026573296, "learning_rate": 0.00011554145622597517, "loss": 0.834, "step": 132150 }, { "epoch": 2.3202654540985623, "grad_norm": 0.04652094177654587, "learning_rate": 0.00011553043336714528, "loss": 0.8444, "step": 132160 }, { "epoch": 2.3204410189785634, "grad_norm": 0.07212004975976503, "learning_rate": 0.00011551941032421199, "loss": 0.8437, "step": 132170 }, { "epoch": 2.320616583858565, "grad_norm": 0.052988721666986, "learning_rate": 0.00011550838709731502, "loss": 0.8416, "step": 132180 }, { "epoch": 2.3207921487385663, "grad_norm": 0.05455026552565, "learning_rate": 0.00011549736368659398, "loss": 0.851, "step": 132190 }, { "epoch": 2.320967713618568, "grad_norm": 0.04906657987831084, "learning_rate": 0.00011548634009218862, "loss": 0.8459, "step": 132200 }, { "epoch": 2.3211432784985693, "grad_norm": 0.05513239000971743, "learning_rate": 0.00011547531631423852, "loss": 0.8534, "step": 132210 }, { "epoch": 2.3213188433785703, "grad_norm": 0.06401149443933428, "learning_rate": 0.00011546429235288333, "loss": 0.842, "step": 132220 }, { "epoch": 2.321494408258572, "grad_norm": 0.06840071081576128, "learning_rate": 0.00011545326820826286, "loss": 0.8389, "step": 132230 }, { "epoch": 2.3216699731385733, "grad_norm": 0.05456158680588138, "learning_rate": 0.00011544224388051668, "loss": 0.846, "step": 132240 }, { "epoch": 2.3218455380185747, "grad_norm": 0.055083135178495356, "learning_rate": 0.00011543121936978452, "loss": 0.8438, "step": 132250 }, { "epoch": 2.322021102898576, "grad_norm": 0.04100271933025384, "learning_rate": 0.00011542019467620607, "loss": 0.8539, "step": 132260 }, { "epoch": 2.3221966677785777, "grad_norm": 0.07141339188047006, "learning_rate": 0.00011540916979992093, "loss": 0.8411, "step": 132270 }, { "epoch": 2.322372232658579, "grad_norm": 0.06038486541640014, "learning_rate": 0.0001153981447410689, "loss": 0.8459, "step": 132280 }, { "epoch": 2.32254779753858, "grad_norm": 0.049495341874494676, "learning_rate": 0.00011538711949978955, "loss": 0.8464, "step": 132290 }, { "epoch": 2.3227233624185817, "grad_norm": 0.0408329699430231, "learning_rate": 0.0001153760940762227, "loss": 0.8536, "step": 132300 }, { "epoch": 2.322898927298583, "grad_norm": 0.059803981773137026, "learning_rate": 0.00011536506847050797, "loss": 0.8438, "step": 132310 }, { "epoch": 2.3230744921785846, "grad_norm": 0.04543362482275578, "learning_rate": 0.00011535404268278508, "loss": 0.8433, "step": 132320 }, { "epoch": 2.323250057058586, "grad_norm": 0.04925225294921224, "learning_rate": 0.00011534301671319368, "loss": 0.8456, "step": 132330 }, { "epoch": 2.3234256219385876, "grad_norm": 0.06175679926016005, "learning_rate": 0.00011533199056187349, "loss": 0.8452, "step": 132340 }, { "epoch": 2.3236011868185886, "grad_norm": 0.056562169796854064, "learning_rate": 0.0001153209642289643, "loss": 0.852, "step": 132350 }, { "epoch": 2.32377675169859, "grad_norm": 0.061940195436966466, "learning_rate": 0.0001153099377146057, "loss": 0.8366, "step": 132360 }, { "epoch": 2.3239523165785916, "grad_norm": 0.04516669425689456, "learning_rate": 0.0001152989110189375, "loss": 0.8436, "step": 132370 }, { "epoch": 2.324127881458593, "grad_norm": 0.05769578225247036, "learning_rate": 0.00011528788414209932, "loss": 0.8474, "step": 132380 }, { "epoch": 2.3243034463385945, "grad_norm": 0.0453077633946414, "learning_rate": 0.00011527685708423088, "loss": 0.8474, "step": 132390 }, { "epoch": 2.324479011218596, "grad_norm": 0.0547638203209841, "learning_rate": 0.00011526582984547199, "loss": 0.8542, "step": 132400 }, { "epoch": 2.324654576098597, "grad_norm": 0.047717822971689285, "learning_rate": 0.00011525480242596226, "loss": 0.8436, "step": 132410 }, { "epoch": 2.3248301409785985, "grad_norm": 0.055083936102855176, "learning_rate": 0.00011524377482584146, "loss": 0.8548, "step": 132420 }, { "epoch": 2.3250057058586, "grad_norm": 0.04923469444673471, "learning_rate": 0.00011523274704524931, "loss": 0.8387, "step": 132430 }, { "epoch": 2.3251812707386015, "grad_norm": 0.052030749926143975, "learning_rate": 0.0001152217190843255, "loss": 0.8412, "step": 132440 }, { "epoch": 2.325356835618603, "grad_norm": 0.052971714579689626, "learning_rate": 0.0001152106909432098, "loss": 0.8418, "step": 132450 }, { "epoch": 2.3255324004986044, "grad_norm": 0.05968021151820387, "learning_rate": 0.00011519966262204193, "loss": 0.8473, "step": 132460 }, { "epoch": 2.325707965378606, "grad_norm": 0.06798298109683613, "learning_rate": 0.00011518863412096164, "loss": 0.8477, "step": 132470 }, { "epoch": 2.325883530258607, "grad_norm": 0.05599923491006632, "learning_rate": 0.00011517760544010861, "loss": 0.8489, "step": 132480 }, { "epoch": 2.3260590951386084, "grad_norm": 0.08324341160803578, "learning_rate": 0.0001151665765796226, "loss": 0.8362, "step": 132490 }, { "epoch": 2.32623466001861, "grad_norm": 0.0562488785905125, "learning_rate": 0.00011515554753964332, "loss": 0.8452, "step": 132500 }, { "epoch": 2.3264102248986114, "grad_norm": 0.04966065608802306, "learning_rate": 0.00011514451832031056, "loss": 0.8376, "step": 132510 }, { "epoch": 2.326585789778613, "grad_norm": 0.04804761078497362, "learning_rate": 0.00011513348892176407, "loss": 0.8461, "step": 132520 }, { "epoch": 2.326761354658614, "grad_norm": 0.05230637811647526, "learning_rate": 0.00011512245934414355, "loss": 0.8438, "step": 132530 }, { "epoch": 2.3269369195386154, "grad_norm": 0.06412457844151244, "learning_rate": 0.00011511142958758876, "loss": 0.8448, "step": 132540 }, { "epoch": 2.327112484418617, "grad_norm": 0.047413347292195596, "learning_rate": 0.00011510039965223946, "loss": 0.8502, "step": 132550 }, { "epoch": 2.3272880492986183, "grad_norm": 0.06002771062709836, "learning_rate": 0.00011508936953823538, "loss": 0.8495, "step": 132560 }, { "epoch": 2.32746361417862, "grad_norm": 0.049832368496222884, "learning_rate": 0.00011507833924571629, "loss": 0.8428, "step": 132570 }, { "epoch": 2.3276391790586213, "grad_norm": 0.05004195474144791, "learning_rate": 0.00011506730877482194, "loss": 0.8509, "step": 132580 }, { "epoch": 2.3278147439386228, "grad_norm": 0.05154846514265575, "learning_rate": 0.0001150562781256921, "loss": 0.8476, "step": 132590 }, { "epoch": 2.327990308818624, "grad_norm": 0.0619546505158028, "learning_rate": 0.00011504524729846656, "loss": 0.8508, "step": 132600 }, { "epoch": 2.3281658736986253, "grad_norm": 0.08696245302103729, "learning_rate": 0.000115034216293285, "loss": 0.8371, "step": 132610 }, { "epoch": 2.3283414385786267, "grad_norm": 0.06489835268600537, "learning_rate": 0.00011502318511028722, "loss": 0.846, "step": 132620 }, { "epoch": 2.328517003458628, "grad_norm": 0.05941380407014737, "learning_rate": 0.000115012153749613, "loss": 0.8468, "step": 132630 }, { "epoch": 2.3286925683386297, "grad_norm": 0.06344944910140864, "learning_rate": 0.00011500112221140214, "loss": 0.8426, "step": 132640 }, { "epoch": 2.3288681332186307, "grad_norm": 0.06585934896664666, "learning_rate": 0.00011499009049579437, "loss": 0.8435, "step": 132650 }, { "epoch": 2.329043698098632, "grad_norm": 0.0456630681738946, "learning_rate": 0.00011497905860292947, "loss": 0.842, "step": 132660 }, { "epoch": 2.3292192629786337, "grad_norm": 0.04903786987446931, "learning_rate": 0.0001149680265329472, "loss": 0.8457, "step": 132670 }, { "epoch": 2.329394827858635, "grad_norm": 0.049532543840952216, "learning_rate": 0.00011495699428598737, "loss": 0.8417, "step": 132680 }, { "epoch": 2.3295703927386366, "grad_norm": 0.06772521262005636, "learning_rate": 0.00011494596186218973, "loss": 0.8436, "step": 132690 }, { "epoch": 2.329745957618638, "grad_norm": 0.05811248080223062, "learning_rate": 0.00011493492926169409, "loss": 0.8424, "step": 132700 }, { "epoch": 2.3299215224986396, "grad_norm": 0.04837732320883386, "learning_rate": 0.00011492389648464026, "loss": 0.8421, "step": 132710 }, { "epoch": 2.3300970873786406, "grad_norm": 0.05100891972778473, "learning_rate": 0.00011491286353116795, "loss": 0.8495, "step": 132720 }, { "epoch": 2.330272652258642, "grad_norm": 0.0683264885333336, "learning_rate": 0.000114901830401417, "loss": 0.8477, "step": 132730 }, { "epoch": 2.3304482171386436, "grad_norm": 0.04729073252840122, "learning_rate": 0.00011489079709552722, "loss": 0.8541, "step": 132740 }, { "epoch": 2.330623782018645, "grad_norm": 0.08101548497065235, "learning_rate": 0.00011487976361363834, "loss": 0.8458, "step": 132750 }, { "epoch": 2.3307993468986465, "grad_norm": 0.047928862878028944, "learning_rate": 0.00011486872995589022, "loss": 0.8483, "step": 132760 }, { "epoch": 2.3309749117786476, "grad_norm": 0.04819535322127413, "learning_rate": 0.00011485769612242263, "loss": 0.8469, "step": 132770 }, { "epoch": 2.331150476658649, "grad_norm": 0.056666192873380754, "learning_rate": 0.00011484666211337536, "loss": 0.8364, "step": 132780 }, { "epoch": 2.3313260415386505, "grad_norm": 0.061740148220989235, "learning_rate": 0.00011483562792888827, "loss": 0.8395, "step": 132790 }, { "epoch": 2.331501606418652, "grad_norm": 0.04939167455822436, "learning_rate": 0.00011482459356910107, "loss": 0.8446, "step": 132800 }, { "epoch": 2.3316771712986535, "grad_norm": 0.0773360337866345, "learning_rate": 0.00011481355903415364, "loss": 0.8422, "step": 132810 }, { "epoch": 2.331852736178655, "grad_norm": 0.08080058169879259, "learning_rate": 0.00011480252432418578, "loss": 0.8412, "step": 132820 }, { "epoch": 2.3320283010586564, "grad_norm": 0.05344320363584502, "learning_rate": 0.0001147914894393373, "loss": 0.8422, "step": 132830 }, { "epoch": 2.3322038659386575, "grad_norm": 0.06337534477077497, "learning_rate": 0.00011478045437974797, "loss": 0.8448, "step": 132840 }, { "epoch": 2.332379430818659, "grad_norm": 0.06409298560970393, "learning_rate": 0.00011476941914555766, "loss": 0.8438, "step": 132850 }, { "epoch": 2.3325549956986604, "grad_norm": 0.08525732620868846, "learning_rate": 0.00011475838373690617, "loss": 0.8426, "step": 132860 }, { "epoch": 2.332730560578662, "grad_norm": 0.07036262246586462, "learning_rate": 0.00011474734815393333, "loss": 0.8381, "step": 132870 }, { "epoch": 2.3329061254586634, "grad_norm": 0.05940171872565286, "learning_rate": 0.00011473631239677897, "loss": 0.8372, "step": 132880 }, { "epoch": 2.3330816903386644, "grad_norm": 0.050642922778728366, "learning_rate": 0.00011472527646558286, "loss": 0.8449, "step": 132890 }, { "epoch": 2.333257255218666, "grad_norm": 0.045409123148277136, "learning_rate": 0.00011471424036048488, "loss": 0.8445, "step": 132900 }, { "epoch": 2.3334328200986674, "grad_norm": 0.07070525988953362, "learning_rate": 0.00011470320408162487, "loss": 0.841, "step": 132910 }, { "epoch": 2.333608384978669, "grad_norm": 0.05464269809761765, "learning_rate": 0.00011469216762914259, "loss": 0.8438, "step": 132920 }, { "epoch": 2.3337839498586703, "grad_norm": 0.0528767920599999, "learning_rate": 0.00011468113100317796, "loss": 0.8404, "step": 132930 }, { "epoch": 2.333959514738672, "grad_norm": 0.07308962953126398, "learning_rate": 0.00011467009420387078, "loss": 0.8511, "step": 132940 }, { "epoch": 2.3341350796186733, "grad_norm": 0.057043019355783625, "learning_rate": 0.00011465905723136086, "loss": 0.8447, "step": 132950 }, { "epoch": 2.3343106444986743, "grad_norm": 0.04589732737653315, "learning_rate": 0.0001146480200857881, "loss": 0.8385, "step": 132960 }, { "epoch": 2.334486209378676, "grad_norm": 0.05946600518766739, "learning_rate": 0.00011463698276729226, "loss": 0.8477, "step": 132970 }, { "epoch": 2.3346617742586773, "grad_norm": 0.06391689425084605, "learning_rate": 0.00011462594527601327, "loss": 0.8451, "step": 132980 }, { "epoch": 2.3348373391386787, "grad_norm": 0.0663536122153661, "learning_rate": 0.00011461490761209098, "loss": 0.8481, "step": 132990 }, { "epoch": 2.3350129040186802, "grad_norm": 0.04891507863726666, "learning_rate": 0.00011460386977566515, "loss": 0.8446, "step": 133000 }, { "epoch": 2.3351884688986817, "grad_norm": 0.05408297511992526, "learning_rate": 0.0001145928317668757, "loss": 0.8397, "step": 133010 }, { "epoch": 2.3353640337786827, "grad_norm": 0.06407483894065841, "learning_rate": 0.00011458179358586244, "loss": 0.8516, "step": 133020 }, { "epoch": 2.335539598658684, "grad_norm": 0.055193460072403645, "learning_rate": 0.00011457075523276528, "loss": 0.8312, "step": 133030 }, { "epoch": 2.3357151635386857, "grad_norm": 0.06625849641814638, "learning_rate": 0.00011455971670772401, "loss": 0.8444, "step": 133040 }, { "epoch": 2.335890728418687, "grad_norm": 0.04988375664156093, "learning_rate": 0.0001145486780108786, "loss": 0.849, "step": 133050 }, { "epoch": 2.3360662932986886, "grad_norm": 0.04917992404367163, "learning_rate": 0.00011453763914236881, "loss": 0.8374, "step": 133060 }, { "epoch": 2.33624185817869, "grad_norm": 0.09263533250985777, "learning_rate": 0.00011452660010233452, "loss": 0.8471, "step": 133070 }, { "epoch": 2.336417423058691, "grad_norm": 0.062406845558656006, "learning_rate": 0.00011451556089091562, "loss": 0.8401, "step": 133080 }, { "epoch": 2.3365929879386926, "grad_norm": 0.05727356816131072, "learning_rate": 0.00011450452150825197, "loss": 0.8437, "step": 133090 }, { "epoch": 2.336768552818694, "grad_norm": 0.05377210058162804, "learning_rate": 0.00011449348195448349, "loss": 0.8529, "step": 133100 }, { "epoch": 2.3369441176986956, "grad_norm": 0.05609071246329465, "learning_rate": 0.00011448244222974995, "loss": 0.847, "step": 133110 }, { "epoch": 2.337119682578697, "grad_norm": 0.06648338747308315, "learning_rate": 0.0001144714023341913, "loss": 0.8498, "step": 133120 }, { "epoch": 2.3372952474586985, "grad_norm": 0.05363243458670433, "learning_rate": 0.00011446036226794741, "loss": 0.8546, "step": 133130 }, { "epoch": 2.3374708123386996, "grad_norm": 0.0551868755198827, "learning_rate": 0.00011444932203115814, "loss": 0.8453, "step": 133140 }, { "epoch": 2.337646377218701, "grad_norm": 0.04560405110848379, "learning_rate": 0.00011443828162396341, "loss": 0.8512, "step": 133150 }, { "epoch": 2.3378219420987025, "grad_norm": 0.05840707024417413, "learning_rate": 0.00011442724104650306, "loss": 0.8443, "step": 133160 }, { "epoch": 2.337997506978704, "grad_norm": 0.043720938290349544, "learning_rate": 0.000114416200298917, "loss": 0.8461, "step": 133170 }, { "epoch": 2.3381730718587055, "grad_norm": 0.08459815444522543, "learning_rate": 0.00011440515938134509, "loss": 0.8445, "step": 133180 }, { "epoch": 2.338348636738707, "grad_norm": 0.06729653270687617, "learning_rate": 0.00011439411829392723, "loss": 0.8451, "step": 133190 }, { "epoch": 2.338524201618708, "grad_norm": 0.07184692595620784, "learning_rate": 0.00011438307703680336, "loss": 0.8452, "step": 133200 }, { "epoch": 2.3386997664987095, "grad_norm": 0.05164048398122927, "learning_rate": 0.00011437203561011331, "loss": 0.8483, "step": 133210 }, { "epoch": 2.338875331378711, "grad_norm": 0.05463675442979536, "learning_rate": 0.00011436099401399705, "loss": 0.8476, "step": 133220 }, { "epoch": 2.3390508962587124, "grad_norm": 0.04317434346770487, "learning_rate": 0.00011434995224859439, "loss": 0.8471, "step": 133230 }, { "epoch": 2.339226461138714, "grad_norm": 0.062031145240477176, "learning_rate": 0.00011433891031404528, "loss": 0.8356, "step": 133240 }, { "epoch": 2.3394020260187154, "grad_norm": 0.06520978768851769, "learning_rate": 0.00011432786821048965, "loss": 0.845, "step": 133250 }, { "epoch": 2.339577590898717, "grad_norm": 0.06514406993847237, "learning_rate": 0.00011431682593806734, "loss": 0.845, "step": 133260 }, { "epoch": 2.339753155778718, "grad_norm": 0.05417855864686621, "learning_rate": 0.00011430578349691832, "loss": 0.853, "step": 133270 }, { "epoch": 2.3399287206587194, "grad_norm": 0.05043245664462972, "learning_rate": 0.00011429474088718246, "loss": 0.8435, "step": 133280 }, { "epoch": 2.340104285538721, "grad_norm": 0.07489475626823756, "learning_rate": 0.00011428369810899968, "loss": 0.8387, "step": 133290 }, { "epoch": 2.3402798504187223, "grad_norm": 0.04874703184042555, "learning_rate": 0.00011427265516250993, "loss": 0.8403, "step": 133300 }, { "epoch": 2.340455415298724, "grad_norm": 0.06425544947963778, "learning_rate": 0.00011426161204785304, "loss": 0.8377, "step": 133310 }, { "epoch": 2.340630980178725, "grad_norm": 0.0578164646124323, "learning_rate": 0.00011425056876516903, "loss": 0.8435, "step": 133320 }, { "epoch": 2.3408065450587263, "grad_norm": 0.05192584398837824, "learning_rate": 0.00011423952531459776, "loss": 0.845, "step": 133330 }, { "epoch": 2.340982109938728, "grad_norm": 0.05136040476776188, "learning_rate": 0.00011422848169627918, "loss": 0.8518, "step": 133340 }, { "epoch": 2.3411576748187293, "grad_norm": 0.0717519613528112, "learning_rate": 0.0001142174379103532, "loss": 0.8485, "step": 133350 }, { "epoch": 2.3413332396987308, "grad_norm": 0.07435547980886908, "learning_rate": 0.00011420639395695972, "loss": 0.8429, "step": 133360 }, { "epoch": 2.3415088045787322, "grad_norm": 0.05402175982884939, "learning_rate": 0.00011419534983623874, "loss": 0.8483, "step": 133370 }, { "epoch": 2.3416843694587337, "grad_norm": 0.05020869648630097, "learning_rate": 0.0001141843055483301, "loss": 0.8477, "step": 133380 }, { "epoch": 2.3418599343387347, "grad_norm": 0.09396413716224337, "learning_rate": 0.00011417326109337383, "loss": 0.8545, "step": 133390 }, { "epoch": 2.342035499218736, "grad_norm": 0.04761332011705497, "learning_rate": 0.00011416221647150978, "loss": 0.8435, "step": 133400 }, { "epoch": 2.3422110640987377, "grad_norm": 0.05903690187175423, "learning_rate": 0.00011415117168287793, "loss": 0.8442, "step": 133410 }, { "epoch": 2.342386628978739, "grad_norm": 0.05478946130131419, "learning_rate": 0.00011414012672761824, "loss": 0.844, "step": 133420 }, { "epoch": 2.3425621938587406, "grad_norm": 0.057047027069992824, "learning_rate": 0.0001141290816058706, "loss": 0.8438, "step": 133430 }, { "epoch": 2.3427377587387417, "grad_norm": 0.07901534477583794, "learning_rate": 0.00011411803631777501, "loss": 0.8516, "step": 133440 }, { "epoch": 2.342913323618743, "grad_norm": 0.05150256974856414, "learning_rate": 0.00011410699086347136, "loss": 0.8451, "step": 133450 }, { "epoch": 2.3430888884987446, "grad_norm": 0.05341598865885184, "learning_rate": 0.00011409594524309961, "loss": 0.8437, "step": 133460 }, { "epoch": 2.343264453378746, "grad_norm": 0.05743868420471526, "learning_rate": 0.00011408489945679975, "loss": 0.8437, "step": 133470 }, { "epoch": 2.3434400182587476, "grad_norm": 0.05313064148828204, "learning_rate": 0.0001140738535047117, "loss": 0.8459, "step": 133480 }, { "epoch": 2.343615583138749, "grad_norm": 0.060995909593158024, "learning_rate": 0.00011406280738697541, "loss": 0.8442, "step": 133490 }, { "epoch": 2.3437911480187505, "grad_norm": 0.04991348597875841, "learning_rate": 0.00011405176110373088, "loss": 0.8389, "step": 133500 }, { "epoch": 2.3439667128987516, "grad_norm": 0.0634234182834563, "learning_rate": 0.00011404071465511801, "loss": 0.8503, "step": 133510 }, { "epoch": 2.344142277778753, "grad_norm": 0.060939383111456844, "learning_rate": 0.0001140296680412768, "loss": 0.8431, "step": 133520 }, { "epoch": 2.3443178426587545, "grad_norm": 0.04599197829761786, "learning_rate": 0.00011401862126234714, "loss": 0.8488, "step": 133530 }, { "epoch": 2.344493407538756, "grad_norm": 0.061431684835272025, "learning_rate": 0.0001140075743184691, "loss": 0.8486, "step": 133540 }, { "epoch": 2.3446689724187575, "grad_norm": 0.07871990985251791, "learning_rate": 0.0001139965272097826, "loss": 0.8443, "step": 133550 }, { "epoch": 2.3448445372987585, "grad_norm": 0.061352389774027676, "learning_rate": 0.00011398547993642765, "loss": 0.8441, "step": 133560 }, { "epoch": 2.34502010217876, "grad_norm": 0.054565289218371095, "learning_rate": 0.00011397443249854412, "loss": 0.8417, "step": 133570 }, { "epoch": 2.3451956670587615, "grad_norm": 0.0575526846466423, "learning_rate": 0.00011396338489627207, "loss": 0.851, "step": 133580 }, { "epoch": 2.345371231938763, "grad_norm": 0.0544901402390001, "learning_rate": 0.00011395233712975144, "loss": 0.8357, "step": 133590 }, { "epoch": 2.3455467968187644, "grad_norm": 0.07141727721092771, "learning_rate": 0.0001139412891991222, "loss": 0.8429, "step": 133600 }, { "epoch": 2.345722361698766, "grad_norm": 0.05651755053268674, "learning_rate": 0.00011393024110452437, "loss": 0.8479, "step": 133610 }, { "epoch": 2.3458979265787674, "grad_norm": 0.08793302104731628, "learning_rate": 0.00011391919284609791, "loss": 0.8415, "step": 133620 }, { "epoch": 2.3460734914587684, "grad_norm": 0.06297835490589034, "learning_rate": 0.0001139081444239828, "loss": 0.8411, "step": 133630 }, { "epoch": 2.34624905633877, "grad_norm": 0.05323587956893086, "learning_rate": 0.00011389709583831902, "loss": 0.8475, "step": 133640 }, { "epoch": 2.3464246212187714, "grad_norm": 0.06103952092462643, "learning_rate": 0.00011388604708924655, "loss": 0.8468, "step": 133650 }, { "epoch": 2.346600186098773, "grad_norm": 0.049469441562608706, "learning_rate": 0.00011387499817690542, "loss": 0.8448, "step": 133660 }, { "epoch": 2.3467757509787743, "grad_norm": 0.054330232568080956, "learning_rate": 0.0001138639491014356, "loss": 0.8488, "step": 133670 }, { "epoch": 2.3469513158587754, "grad_norm": 0.07424862133423164, "learning_rate": 0.00011385289986297707, "loss": 0.8508, "step": 133680 }, { "epoch": 2.347126880738777, "grad_norm": 0.07971854225167067, "learning_rate": 0.00011384185046166982, "loss": 0.8462, "step": 133690 }, { "epoch": 2.3473024456187783, "grad_norm": 0.0502736792146911, "learning_rate": 0.00011383080089765386, "loss": 0.8457, "step": 133700 }, { "epoch": 2.34747801049878, "grad_norm": 0.05815044357912154, "learning_rate": 0.0001138197511710692, "loss": 0.8389, "step": 133710 }, { "epoch": 2.3476535753787813, "grad_norm": 0.05750956747087427, "learning_rate": 0.00011380870128205583, "loss": 0.8481, "step": 133720 }, { "epoch": 2.3478291402587828, "grad_norm": 0.05447350098413739, "learning_rate": 0.0001137976512307538, "loss": 0.8397, "step": 133730 }, { "epoch": 2.3480047051387842, "grad_norm": 0.08051909154800427, "learning_rate": 0.00011378660101730305, "loss": 0.8453, "step": 133740 }, { "epoch": 2.3481802700187853, "grad_norm": 0.058172184245181495, "learning_rate": 0.00011377555064184358, "loss": 0.8482, "step": 133750 }, { "epoch": 2.3483558348987867, "grad_norm": 0.05350353354607648, "learning_rate": 0.00011376450010451547, "loss": 0.8491, "step": 133760 }, { "epoch": 2.348531399778788, "grad_norm": 0.04065558787295076, "learning_rate": 0.0001137534494054587, "loss": 0.8489, "step": 133770 }, { "epoch": 2.3487069646587897, "grad_norm": 0.07998306416919893, "learning_rate": 0.00011374239854481328, "loss": 0.8496, "step": 133780 }, { "epoch": 2.348882529538791, "grad_norm": 0.04725412985017815, "learning_rate": 0.00011373134752271923, "loss": 0.8482, "step": 133790 }, { "epoch": 2.3490580944187927, "grad_norm": 0.04589193171362352, "learning_rate": 0.00011372029633931656, "loss": 0.8436, "step": 133800 }, { "epoch": 2.3492336592987937, "grad_norm": 0.04564785441890648, "learning_rate": 0.0001137092449947453, "loss": 0.8502, "step": 133810 }, { "epoch": 2.349409224178795, "grad_norm": 0.053695174970113564, "learning_rate": 0.00011369819348914545, "loss": 0.8458, "step": 133820 }, { "epoch": 2.3495847890587966, "grad_norm": 0.047985305021954076, "learning_rate": 0.00011368714182265708, "loss": 0.8383, "step": 133830 }, { "epoch": 2.349760353938798, "grad_norm": 0.051069697973699035, "learning_rate": 0.00011367608999542019, "loss": 0.8564, "step": 133840 }, { "epoch": 2.3499359188187996, "grad_norm": 0.051280095325865055, "learning_rate": 0.00011366503800757482, "loss": 0.8386, "step": 133850 }, { "epoch": 2.350111483698801, "grad_norm": 0.05922752650071419, "learning_rate": 0.00011365398585926097, "loss": 0.8431, "step": 133860 }, { "epoch": 2.350287048578802, "grad_norm": 0.057142008276130286, "learning_rate": 0.0001136429335506187, "loss": 0.8385, "step": 133870 }, { "epoch": 2.3504626134588036, "grad_norm": 0.0701950070364163, "learning_rate": 0.00011363188108178804, "loss": 0.8516, "step": 133880 }, { "epoch": 2.350638178338805, "grad_norm": 0.04610639525322846, "learning_rate": 0.000113620828452909, "loss": 0.8421, "step": 133890 }, { "epoch": 2.3508137432188065, "grad_norm": 0.04922782959920052, "learning_rate": 0.0001136097756641217, "loss": 0.8418, "step": 133900 }, { "epoch": 2.350989308098808, "grad_norm": 0.050517958323559375, "learning_rate": 0.00011359872271556608, "loss": 0.847, "step": 133910 }, { "epoch": 2.3511648729788095, "grad_norm": 0.048691442953671664, "learning_rate": 0.00011358766960738223, "loss": 0.8453, "step": 133920 }, { "epoch": 2.351340437858811, "grad_norm": 0.06919537806019121, "learning_rate": 0.00011357661633971024, "loss": 0.8408, "step": 133930 }, { "epoch": 2.351516002738812, "grad_norm": 0.06366174153638317, "learning_rate": 0.00011356556291269007, "loss": 0.8375, "step": 133940 }, { "epoch": 2.3516915676188135, "grad_norm": 0.056672980579162195, "learning_rate": 0.00011355450932646182, "loss": 0.8399, "step": 133950 }, { "epoch": 2.351867132498815, "grad_norm": 0.06604085320127266, "learning_rate": 0.00011354345558116555, "loss": 0.8489, "step": 133960 }, { "epoch": 2.3520426973788164, "grad_norm": 0.05614710251027746, "learning_rate": 0.00011353240167694123, "loss": 0.8329, "step": 133970 }, { "epoch": 2.352218262258818, "grad_norm": 0.06348692754401203, "learning_rate": 0.00011352134761392906, "loss": 0.848, "step": 133980 }, { "epoch": 2.352393827138819, "grad_norm": 0.050907882333801, "learning_rate": 0.00011351029339226895, "loss": 0.8469, "step": 133990 }, { "epoch": 2.3525693920188204, "grad_norm": 0.05208965210778257, "learning_rate": 0.00011349923901210106, "loss": 0.8468, "step": 134000 }, { "epoch": 2.352744956898822, "grad_norm": 0.050262895089600974, "learning_rate": 0.00011348818447356542, "loss": 0.8454, "step": 134010 }, { "epoch": 2.3529205217788234, "grad_norm": 0.051559588390177775, "learning_rate": 0.0001134771297768021, "loss": 0.8444, "step": 134020 }, { "epoch": 2.353096086658825, "grad_norm": 0.052674868698447945, "learning_rate": 0.00011346607492195112, "loss": 0.8443, "step": 134030 }, { "epoch": 2.3532716515388263, "grad_norm": 0.06363751583000443, "learning_rate": 0.00011345501990915259, "loss": 0.8412, "step": 134040 }, { "epoch": 2.353447216418828, "grad_norm": 0.05299799458550487, "learning_rate": 0.00011344396473854657, "loss": 0.8431, "step": 134050 }, { "epoch": 2.353622781298829, "grad_norm": 0.049198275904631446, "learning_rate": 0.00011343290941027314, "loss": 0.8471, "step": 134060 }, { "epoch": 2.3537983461788303, "grad_norm": 0.057771010492266565, "learning_rate": 0.00011342185392447238, "loss": 0.8489, "step": 134070 }, { "epoch": 2.353973911058832, "grad_norm": 0.06354606721353578, "learning_rate": 0.00011341079828128431, "loss": 0.8375, "step": 134080 }, { "epoch": 2.3541494759388333, "grad_norm": 0.08993811277198738, "learning_rate": 0.00011339974248084906, "loss": 0.8437, "step": 134090 }, { "epoch": 2.3543250408188348, "grad_norm": 0.0424921774849628, "learning_rate": 0.00011338868652330672, "loss": 0.8483, "step": 134100 }, { "epoch": 2.354500605698836, "grad_norm": 0.05941425782265578, "learning_rate": 0.00011337763040879732, "loss": 0.8529, "step": 134110 }, { "epoch": 2.3546761705788373, "grad_norm": 0.0736690796796343, "learning_rate": 0.00011336657413746099, "loss": 0.8474, "step": 134120 }, { "epoch": 2.3548517354588387, "grad_norm": 0.06591040263933456, "learning_rate": 0.0001133555177094378, "loss": 0.8407, "step": 134130 }, { "epoch": 2.3550273003388402, "grad_norm": 0.05759538132838987, "learning_rate": 0.0001133444611248678, "loss": 0.8322, "step": 134140 }, { "epoch": 2.3552028652188417, "grad_norm": 0.06575002883440076, "learning_rate": 0.00011333340438389115, "loss": 0.8399, "step": 134150 }, { "epoch": 2.355378430098843, "grad_norm": 0.05478873016768162, "learning_rate": 0.00011332234748664789, "loss": 0.8486, "step": 134160 }, { "epoch": 2.3555539949788447, "grad_norm": 0.06005695129748368, "learning_rate": 0.00011331129043327815, "loss": 0.8516, "step": 134170 }, { "epoch": 2.3557295598588457, "grad_norm": 0.0798240069685387, "learning_rate": 0.00011330023322392198, "loss": 0.847, "step": 134180 }, { "epoch": 2.355905124738847, "grad_norm": 0.051078978658778106, "learning_rate": 0.00011328917585871948, "loss": 0.8437, "step": 134190 }, { "epoch": 2.3560806896188486, "grad_norm": 0.05103345377779297, "learning_rate": 0.00011327811833781079, "loss": 0.8481, "step": 134200 }, { "epoch": 2.35625625449885, "grad_norm": 0.07112212415029301, "learning_rate": 0.00011326706066133594, "loss": 0.8441, "step": 134210 }, { "epoch": 2.3564318193788516, "grad_norm": 0.0876135528747004, "learning_rate": 0.00011325600282943512, "loss": 0.8408, "step": 134220 }, { "epoch": 2.3566073842588526, "grad_norm": 0.059586193716567315, "learning_rate": 0.00011324494484224839, "loss": 0.8537, "step": 134230 }, { "epoch": 2.356782949138854, "grad_norm": 0.05213501446566046, "learning_rate": 0.0001132338866999159, "loss": 0.842, "step": 134240 }, { "epoch": 2.3569585140188556, "grad_norm": 0.053887887050827, "learning_rate": 0.00011322282840257768, "loss": 0.8479, "step": 134250 }, { "epoch": 2.357134078898857, "grad_norm": 0.05930323804927232, "learning_rate": 0.00011321176995037385, "loss": 0.8404, "step": 134260 }, { "epoch": 2.3573096437788585, "grad_norm": 0.0454905671134837, "learning_rate": 0.00011320071134344461, "loss": 0.8484, "step": 134270 }, { "epoch": 2.35748520865886, "grad_norm": 0.05985554956882661, "learning_rate": 0.00011318965258193, "loss": 0.8424, "step": 134280 }, { "epoch": 2.3576607735388615, "grad_norm": 0.07924237745192811, "learning_rate": 0.00011317859366597016, "loss": 0.837, "step": 134290 }, { "epoch": 2.3578363384188625, "grad_norm": 0.07205908809117192, "learning_rate": 0.0001131675345957052, "loss": 0.836, "step": 134300 }, { "epoch": 2.358011903298864, "grad_norm": 0.06187650068168415, "learning_rate": 0.00011315647537127523, "loss": 0.8416, "step": 134310 }, { "epoch": 2.3581874681788655, "grad_norm": 0.046592971578934994, "learning_rate": 0.00011314541599282042, "loss": 0.8436, "step": 134320 }, { "epoch": 2.358363033058867, "grad_norm": 0.06343894127762152, "learning_rate": 0.00011313435646048083, "loss": 0.8467, "step": 134330 }, { "epoch": 2.3585385979388684, "grad_norm": 0.056224469783179944, "learning_rate": 0.00011312329677439664, "loss": 0.8413, "step": 134340 }, { "epoch": 2.3587141628188695, "grad_norm": 0.05722784230457622, "learning_rate": 0.00011311223693470793, "loss": 0.8477, "step": 134350 }, { "epoch": 2.358889727698871, "grad_norm": 0.0662201563007302, "learning_rate": 0.00011310117694155491, "loss": 0.8407, "step": 134360 }, { "epoch": 2.3590652925788724, "grad_norm": 0.06773517247192046, "learning_rate": 0.00011309011679507761, "loss": 0.8427, "step": 134370 }, { "epoch": 2.359240857458874, "grad_norm": 0.06805427120856299, "learning_rate": 0.0001130790564954162, "loss": 0.8448, "step": 134380 }, { "epoch": 2.3594164223388754, "grad_norm": 0.05092751161665691, "learning_rate": 0.00011306799604271088, "loss": 0.847, "step": 134390 }, { "epoch": 2.359591987218877, "grad_norm": 0.061491711813521144, "learning_rate": 0.00011305693543710169, "loss": 0.8499, "step": 134400 }, { "epoch": 2.3597675520988783, "grad_norm": 0.04802451706881648, "learning_rate": 0.00011304587467872887, "loss": 0.8523, "step": 134410 }, { "epoch": 2.3599431169788794, "grad_norm": 0.042735942462558886, "learning_rate": 0.00011303481376773246, "loss": 0.8421, "step": 134420 }, { "epoch": 2.360118681858881, "grad_norm": 0.05639165736157412, "learning_rate": 0.00011302375270425268, "loss": 0.8458, "step": 134430 }, { "epoch": 2.3602942467388823, "grad_norm": 0.058342284284294735, "learning_rate": 0.00011301269148842966, "loss": 0.8475, "step": 134440 }, { "epoch": 2.360469811618884, "grad_norm": 0.05362668683256812, "learning_rate": 0.0001130016301204035, "loss": 0.8539, "step": 134450 }, { "epoch": 2.3606453764988853, "grad_norm": 0.05631504529116611, "learning_rate": 0.0001129905686003144, "loss": 0.8488, "step": 134460 }, { "epoch": 2.3608209413788868, "grad_norm": 0.08893655411120635, "learning_rate": 0.00011297950692830252, "loss": 0.841, "step": 134470 }, { "epoch": 2.360996506258888, "grad_norm": 0.04651328065396194, "learning_rate": 0.00011296844510450795, "loss": 0.8434, "step": 134480 }, { "epoch": 2.3611720711388893, "grad_norm": 0.07185462307379226, "learning_rate": 0.00011295738312907093, "loss": 0.8455, "step": 134490 }, { "epoch": 2.3613476360188907, "grad_norm": 0.07825435549185263, "learning_rate": 0.00011294632100213155, "loss": 0.845, "step": 134500 }, { "epoch": 2.3615232008988922, "grad_norm": 0.07329956798573353, "learning_rate": 0.00011293525872383, "loss": 0.835, "step": 134510 }, { "epoch": 2.3616987657788937, "grad_norm": 0.05449693927899409, "learning_rate": 0.00011292419629430644, "loss": 0.8414, "step": 134520 }, { "epoch": 2.361874330658895, "grad_norm": 0.05147985849243901, "learning_rate": 0.00011291313371370102, "loss": 0.8354, "step": 134530 }, { "epoch": 2.362049895538896, "grad_norm": 0.05542904436273784, "learning_rate": 0.00011290207098215391, "loss": 0.8484, "step": 134540 }, { "epoch": 2.3622254604188977, "grad_norm": 0.05972478654747157, "learning_rate": 0.00011289100809980528, "loss": 0.8407, "step": 134550 }, { "epoch": 2.362401025298899, "grad_norm": 0.04933241126890812, "learning_rate": 0.00011287994506679532, "loss": 0.8478, "step": 134560 }, { "epoch": 2.3625765901789006, "grad_norm": 0.058987268514871224, "learning_rate": 0.00011286888188326415, "loss": 0.8396, "step": 134570 }, { "epoch": 2.362752155058902, "grad_norm": 0.05704147932383239, "learning_rate": 0.000112857818549352, "loss": 0.8456, "step": 134580 }, { "epoch": 2.3629277199389036, "grad_norm": 0.0576600440389391, "learning_rate": 0.000112846755065199, "loss": 0.8495, "step": 134590 }, { "epoch": 2.3631032848189046, "grad_norm": 0.05295466996177986, "learning_rate": 0.00011283569143094532, "loss": 0.8495, "step": 134600 }, { "epoch": 2.363278849698906, "grad_norm": 0.056840243017410294, "learning_rate": 0.00011282462764673118, "loss": 0.844, "step": 134610 }, { "epoch": 2.3634544145789076, "grad_norm": 0.06014432289874687, "learning_rate": 0.00011281356371269674, "loss": 0.8445, "step": 134620 }, { "epoch": 2.363629979458909, "grad_norm": 0.06151625454873578, "learning_rate": 0.0001128024996289822, "loss": 0.8447, "step": 134630 }, { "epoch": 2.3638055443389105, "grad_norm": 0.04967207961854145, "learning_rate": 0.00011279143539572772, "loss": 0.8488, "step": 134640 }, { "epoch": 2.363981109218912, "grad_norm": 0.051862174218889154, "learning_rate": 0.00011278037101307345, "loss": 0.8375, "step": 134650 }, { "epoch": 2.364156674098913, "grad_norm": 0.054961183465586906, "learning_rate": 0.00011276930648115968, "loss": 0.8445, "step": 134660 }, { "epoch": 2.3643322389789145, "grad_norm": 0.056814842391382285, "learning_rate": 0.00011275824180012653, "loss": 0.8437, "step": 134670 }, { "epoch": 2.364507803858916, "grad_norm": 0.06028191984778268, "learning_rate": 0.00011274717697011419, "loss": 0.8416, "step": 134680 }, { "epoch": 2.3646833687389175, "grad_norm": 0.05017310789774405, "learning_rate": 0.00011273611199126289, "loss": 0.8461, "step": 134690 }, { "epoch": 2.364858933618919, "grad_norm": 0.047324711938525346, "learning_rate": 0.0001127250468637128, "loss": 0.8489, "step": 134700 }, { "epoch": 2.3650344984989204, "grad_norm": 0.04831003836280072, "learning_rate": 0.00011271398158760409, "loss": 0.845, "step": 134710 }, { "epoch": 2.365210063378922, "grad_norm": 0.055629309918760195, "learning_rate": 0.00011270291616307698, "loss": 0.847, "step": 134720 }, { "epoch": 2.365385628258923, "grad_norm": 0.043742760066158286, "learning_rate": 0.00011269185059027172, "loss": 0.8402, "step": 134730 }, { "epoch": 2.3655611931389244, "grad_norm": 0.04697661505185484, "learning_rate": 0.00011268078486932842, "loss": 0.8457, "step": 134740 }, { "epoch": 2.365736758018926, "grad_norm": 0.054346910638125356, "learning_rate": 0.00011266971900038743, "loss": 0.8418, "step": 134750 }, { "epoch": 2.3659123228989274, "grad_norm": 0.049495254821302605, "learning_rate": 0.00011265865298358878, "loss": 0.8369, "step": 134760 }, { "epoch": 2.366087887778929, "grad_norm": 0.046722739581489876, "learning_rate": 0.00011264758681907277, "loss": 0.8372, "step": 134770 }, { "epoch": 2.36626345265893, "grad_norm": 0.05772425001252415, "learning_rate": 0.00011263652050697964, "loss": 0.8446, "step": 134780 }, { "epoch": 2.3664390175389314, "grad_norm": 0.04685352141677186, "learning_rate": 0.00011262545404744953, "loss": 0.8463, "step": 134790 }, { "epoch": 2.366614582418933, "grad_norm": 0.05424722754100176, "learning_rate": 0.00011261438744062269, "loss": 0.8355, "step": 134800 }, { "epoch": 2.3667901472989343, "grad_norm": 0.059222465518485146, "learning_rate": 0.00011260332068663937, "loss": 0.8473, "step": 134810 }, { "epoch": 2.366965712178936, "grad_norm": 0.06532298023179649, "learning_rate": 0.00011259225378563972, "loss": 0.8491, "step": 134820 }, { "epoch": 2.3671412770589373, "grad_norm": 0.05633681457684546, "learning_rate": 0.00011258118673776402, "loss": 0.8434, "step": 134830 }, { "epoch": 2.3673168419389388, "grad_norm": 0.04725188109238191, "learning_rate": 0.00011257011954315244, "loss": 0.8493, "step": 134840 }, { "epoch": 2.36749240681894, "grad_norm": 0.066596828293837, "learning_rate": 0.00011255905220194524, "loss": 0.8387, "step": 134850 }, { "epoch": 2.3676679716989413, "grad_norm": 0.06594099624330622, "learning_rate": 0.00011254798471428263, "loss": 0.836, "step": 134860 }, { "epoch": 2.3678435365789428, "grad_norm": 0.0721137610254976, "learning_rate": 0.00011253691708030485, "loss": 0.8448, "step": 134870 }, { "epoch": 2.3680191014589442, "grad_norm": 0.044927891086740224, "learning_rate": 0.00011252584930015211, "loss": 0.8417, "step": 134880 }, { "epoch": 2.3681946663389457, "grad_norm": 0.06688511018046475, "learning_rate": 0.00011251478137396462, "loss": 0.844, "step": 134890 }, { "epoch": 2.3683702312189467, "grad_norm": 0.048054125548318526, "learning_rate": 0.00011250371330188269, "loss": 0.846, "step": 134900 }, { "epoch": 2.368545796098948, "grad_norm": 0.05024961818885813, "learning_rate": 0.00011249264508404649, "loss": 0.856, "step": 134910 }, { "epoch": 2.3687213609789497, "grad_norm": 0.051886287078572034, "learning_rate": 0.00011248157672059631, "loss": 0.8391, "step": 134920 }, { "epoch": 2.368896925858951, "grad_norm": 0.05240016887912636, "learning_rate": 0.00011247050821167229, "loss": 0.8393, "step": 134930 }, { "epoch": 2.3690724907389527, "grad_norm": 0.04852978780496837, "learning_rate": 0.00011245943955741476, "loss": 0.8503, "step": 134940 }, { "epoch": 2.369248055618954, "grad_norm": 0.0576291631114266, "learning_rate": 0.00011244837075796395, "loss": 0.8411, "step": 134950 }, { "epoch": 2.3694236204989556, "grad_norm": 0.04761097707247367, "learning_rate": 0.00011243730181346006, "loss": 0.8472, "step": 134960 }, { "epoch": 2.3695991853789566, "grad_norm": 0.06218803529343042, "learning_rate": 0.00011242623272404338, "loss": 0.8443, "step": 134970 }, { "epoch": 2.369774750258958, "grad_norm": 0.048111598485552166, "learning_rate": 0.00011241516348985414, "loss": 0.8471, "step": 134980 }, { "epoch": 2.3699503151389596, "grad_norm": 0.05231987940251522, "learning_rate": 0.00011240409411103261, "loss": 0.8514, "step": 134990 }, { "epoch": 2.370125880018961, "grad_norm": 0.04234706713217439, "learning_rate": 0.00011239302458771899, "loss": 0.8418, "step": 135000 }, { "epoch": 2.3703014448989626, "grad_norm": 0.05484713594735871, "learning_rate": 0.00011238195492005355, "loss": 0.8445, "step": 135010 }, { "epoch": 2.3704770097789636, "grad_norm": 0.0529803929181867, "learning_rate": 0.0001123708851081766, "loss": 0.8515, "step": 135020 }, { "epoch": 2.370652574658965, "grad_norm": 0.04823609707079143, "learning_rate": 0.00011235981515222835, "loss": 0.8441, "step": 135030 }, { "epoch": 2.3708281395389665, "grad_norm": 0.06279644760476553, "learning_rate": 0.00011234874505234907, "loss": 0.84, "step": 135040 }, { "epoch": 2.371003704418968, "grad_norm": 0.048406056834322544, "learning_rate": 0.000112337674808679, "loss": 0.847, "step": 135050 }, { "epoch": 2.3711792692989695, "grad_norm": 0.052854682366336836, "learning_rate": 0.0001123266044213584, "loss": 0.8446, "step": 135060 }, { "epoch": 2.371354834178971, "grad_norm": 0.05946573253358468, "learning_rate": 0.00011231553389052758, "loss": 0.8428, "step": 135070 }, { "epoch": 2.3715303990589724, "grad_norm": 0.05294775091356678, "learning_rate": 0.00011230446321632677, "loss": 0.8409, "step": 135080 }, { "epoch": 2.3717059639389735, "grad_norm": 0.05964013778116095, "learning_rate": 0.00011229339239889627, "loss": 0.8384, "step": 135090 }, { "epoch": 2.371881528818975, "grad_norm": 0.053903976505798666, "learning_rate": 0.00011228232143837629, "loss": 0.8419, "step": 135100 }, { "epoch": 2.3720570936989764, "grad_norm": 0.05012152712146011, "learning_rate": 0.00011227125033490712, "loss": 0.85, "step": 135110 }, { "epoch": 2.372232658578978, "grad_norm": 0.07881720507300939, "learning_rate": 0.0001122601790886291, "loss": 0.8629, "step": 135120 }, { "epoch": 2.3724082234589794, "grad_norm": 0.07840137925099194, "learning_rate": 0.0001122491076996824, "loss": 0.8453, "step": 135130 }, { "epoch": 2.3725837883389804, "grad_norm": 0.055860901533570526, "learning_rate": 0.0001122380361682074, "loss": 0.8386, "step": 135140 }, { "epoch": 2.372759353218982, "grad_norm": 0.05701449364322488, "learning_rate": 0.00011222696449434431, "loss": 0.8477, "step": 135150 }, { "epoch": 2.3729349180989834, "grad_norm": 0.0645531239355094, "learning_rate": 0.00011221589267823343, "loss": 0.8408, "step": 135160 }, { "epoch": 2.373110482978985, "grad_norm": 0.062226106757402366, "learning_rate": 0.00011220482072001502, "loss": 0.8418, "step": 135170 }, { "epoch": 2.3732860478589863, "grad_norm": 0.05243973295520538, "learning_rate": 0.00011219374861982936, "loss": 0.8433, "step": 135180 }, { "epoch": 2.373461612738988, "grad_norm": 0.048019556474398775, "learning_rate": 0.00011218267637781681, "loss": 0.8446, "step": 135190 }, { "epoch": 2.3736371776189893, "grad_norm": 0.05020999620157854, "learning_rate": 0.00011217160399411759, "loss": 0.8425, "step": 135200 }, { "epoch": 2.3738127424989903, "grad_norm": 0.07144757194152918, "learning_rate": 0.00011216053146887201, "loss": 0.8393, "step": 135210 }, { "epoch": 2.373988307378992, "grad_norm": 0.04287802012962726, "learning_rate": 0.00011214945880222036, "loss": 0.846, "step": 135220 }, { "epoch": 2.3741638722589933, "grad_norm": 0.052773430132874534, "learning_rate": 0.00011213838599430293, "loss": 0.8402, "step": 135230 }, { "epoch": 2.3743394371389948, "grad_norm": 0.06476892247770304, "learning_rate": 0.00011212731304526001, "loss": 0.8408, "step": 135240 }, { "epoch": 2.3745150020189962, "grad_norm": 0.05688639641141242, "learning_rate": 0.00011211623995523188, "loss": 0.8494, "step": 135250 }, { "epoch": 2.3746905668989977, "grad_norm": 0.05814186556294543, "learning_rate": 0.00011210516672435891, "loss": 0.8437, "step": 135260 }, { "epoch": 2.3748661317789987, "grad_norm": 0.05513625077471902, "learning_rate": 0.00011209409335278132, "loss": 0.8434, "step": 135270 }, { "epoch": 2.3750416966590002, "grad_norm": 0.05593403920596901, "learning_rate": 0.00011208301984063943, "loss": 0.8444, "step": 135280 }, { "epoch": 2.3752172615390017, "grad_norm": 0.05254619019419086, "learning_rate": 0.00011207194618807358, "loss": 0.84, "step": 135290 }, { "epoch": 2.375392826419003, "grad_norm": 0.07809043129805891, "learning_rate": 0.00011206087239522401, "loss": 0.8387, "step": 135300 }, { "epoch": 2.3755683912990047, "grad_norm": 0.058259908961249544, "learning_rate": 0.00011204979846223109, "loss": 0.8517, "step": 135310 }, { "epoch": 2.375743956179006, "grad_norm": 0.0556637799919183, "learning_rate": 0.00011203872438923513, "loss": 0.8436, "step": 135320 }, { "epoch": 2.375919521059007, "grad_norm": 0.08251639901100333, "learning_rate": 0.00011202765017637639, "loss": 0.8392, "step": 135330 }, { "epoch": 2.3760950859390086, "grad_norm": 0.06321009536591687, "learning_rate": 0.00011201657582379521, "loss": 0.8406, "step": 135340 }, { "epoch": 2.37627065081901, "grad_norm": 0.04797544349248897, "learning_rate": 0.00011200550133163188, "loss": 0.8468, "step": 135350 }, { "epoch": 2.3764462156990116, "grad_norm": 0.05270035445523443, "learning_rate": 0.00011199442670002678, "loss": 0.8386, "step": 135360 }, { "epoch": 2.376621780579013, "grad_norm": 0.06254608246911306, "learning_rate": 0.00011198335192912018, "loss": 0.8451, "step": 135370 }, { "epoch": 2.3767973454590146, "grad_norm": 0.057326655550731354, "learning_rate": 0.00011197227701905242, "loss": 0.8494, "step": 135380 }, { "epoch": 2.376972910339016, "grad_norm": 0.05723587240710077, "learning_rate": 0.00011196120196996376, "loss": 0.8498, "step": 135390 }, { "epoch": 2.377148475219017, "grad_norm": 0.04824233037035662, "learning_rate": 0.0001119501267819946, "loss": 0.8567, "step": 135400 }, { "epoch": 2.3773240400990185, "grad_norm": 0.04268248018070617, "learning_rate": 0.00011193905145528522, "loss": 0.8467, "step": 135410 }, { "epoch": 2.37749960497902, "grad_norm": 0.05245071595333195, "learning_rate": 0.00011192797598997597, "loss": 0.8478, "step": 135420 }, { "epoch": 2.3776751698590215, "grad_norm": 0.05590186334180159, "learning_rate": 0.00011191690038620719, "loss": 0.8406, "step": 135430 }, { "epoch": 2.377850734739023, "grad_norm": 0.05133072927163954, "learning_rate": 0.00011190582464411918, "loss": 0.8508, "step": 135440 }, { "epoch": 2.378026299619024, "grad_norm": 0.06758040666062896, "learning_rate": 0.00011189474876385227, "loss": 0.8518, "step": 135450 }, { "epoch": 2.3782018644990255, "grad_norm": 0.059446773083962824, "learning_rate": 0.00011188367274554681, "loss": 0.8444, "step": 135460 }, { "epoch": 2.378377429379027, "grad_norm": 0.05569368865105178, "learning_rate": 0.0001118725965893431, "loss": 0.8512, "step": 135470 }, { "epoch": 2.3785529942590284, "grad_norm": 0.08273207947771312, "learning_rate": 0.00011186152029538157, "loss": 0.8423, "step": 135480 }, { "epoch": 2.37872855913903, "grad_norm": 0.056110009326340936, "learning_rate": 0.00011185044386380245, "loss": 0.848, "step": 135490 }, { "epoch": 2.3789041240190314, "grad_norm": 0.04226877869068197, "learning_rate": 0.00011183936729474616, "loss": 0.8465, "step": 135500 }, { "epoch": 2.379079688899033, "grad_norm": 0.10686022403861829, "learning_rate": 0.00011182829058835298, "loss": 0.8414, "step": 135510 }, { "epoch": 2.379255253779034, "grad_norm": 0.05801992601437741, "learning_rate": 0.00011181721374476328, "loss": 0.8434, "step": 135520 }, { "epoch": 2.3794308186590354, "grad_norm": 0.06683512978949109, "learning_rate": 0.00011180613676411743, "loss": 0.8428, "step": 135530 }, { "epoch": 2.379606383539037, "grad_norm": 0.04794793299597641, "learning_rate": 0.00011179505964655577, "loss": 0.8475, "step": 135540 }, { "epoch": 2.3797819484190383, "grad_norm": 0.045299745619026485, "learning_rate": 0.00011178398239221861, "loss": 0.8518, "step": 135550 }, { "epoch": 2.37995751329904, "grad_norm": 0.045930076065542856, "learning_rate": 0.0001117729050012463, "loss": 0.8417, "step": 135560 }, { "epoch": 2.380133078179041, "grad_norm": 0.058636647618744604, "learning_rate": 0.00011176182747377925, "loss": 0.8395, "step": 135570 }, { "epoch": 2.3803086430590423, "grad_norm": 0.05825808180266637, "learning_rate": 0.0001117507498099578, "loss": 0.8377, "step": 135580 }, { "epoch": 2.380484207939044, "grad_norm": 0.060830254278643664, "learning_rate": 0.00011173967200992226, "loss": 0.8347, "step": 135590 }, { "epoch": 2.3806597728190453, "grad_norm": 0.05488064821773971, "learning_rate": 0.00011172859407381304, "loss": 0.8405, "step": 135600 }, { "epoch": 2.3808353376990468, "grad_norm": 0.06182569303487677, "learning_rate": 0.00011171751600177045, "loss": 0.8393, "step": 135610 }, { "epoch": 2.3810109025790482, "grad_norm": 0.04704812327882085, "learning_rate": 0.0001117064377939349, "loss": 0.8361, "step": 135620 }, { "epoch": 2.3811864674590497, "grad_norm": 0.04768114592647806, "learning_rate": 0.00011169535945044674, "loss": 0.8432, "step": 135630 }, { "epoch": 2.3813620323390507, "grad_norm": 0.062186801231298744, "learning_rate": 0.00011168428097144627, "loss": 0.8367, "step": 135640 }, { "epoch": 2.3815375972190522, "grad_norm": 0.05183491182797227, "learning_rate": 0.00011167320235707397, "loss": 0.8486, "step": 135650 }, { "epoch": 2.3817131620990537, "grad_norm": 0.05074625180052012, "learning_rate": 0.00011166212360747015, "loss": 0.8458, "step": 135660 }, { "epoch": 2.381888726979055, "grad_norm": 0.052918597370422996, "learning_rate": 0.00011165104472277518, "loss": 0.8411, "step": 135670 }, { "epoch": 2.3820642918590567, "grad_norm": 0.05108737378861503, "learning_rate": 0.0001116399657031294, "loss": 0.8461, "step": 135680 }, { "epoch": 2.3822398567390577, "grad_norm": 0.06748402102538224, "learning_rate": 0.00011162888654867323, "loss": 0.8439, "step": 135690 }, { "epoch": 2.382415421619059, "grad_norm": 0.04730311252440297, "learning_rate": 0.00011161780725954702, "loss": 0.8402, "step": 135700 }, { "epoch": 2.3825909864990606, "grad_norm": 0.0549193461447507, "learning_rate": 0.0001116067278358912, "loss": 0.8391, "step": 135710 }, { "epoch": 2.382766551379062, "grad_norm": 0.06783967411406316, "learning_rate": 0.00011159564827784608, "loss": 0.839, "step": 135720 }, { "epoch": 2.3829421162590636, "grad_norm": 0.06506584828169347, "learning_rate": 0.00011158456858555203, "loss": 0.8452, "step": 135730 }, { "epoch": 2.383117681139065, "grad_norm": 0.045019606675359945, "learning_rate": 0.00011157348875914948, "loss": 0.8471, "step": 135740 }, { "epoch": 2.3832932460190666, "grad_norm": 0.06654761031301956, "learning_rate": 0.00011156240879877885, "loss": 0.8398, "step": 135750 }, { "epoch": 2.3834688108990676, "grad_norm": 0.0631949990482006, "learning_rate": 0.0001115513287045804, "loss": 0.8485, "step": 135760 }, { "epoch": 2.383644375779069, "grad_norm": 0.05887973492961591, "learning_rate": 0.00011154024847669468, "loss": 0.8446, "step": 135770 }, { "epoch": 2.3838199406590705, "grad_norm": 0.06352633661408974, "learning_rate": 0.00011152916811526195, "loss": 0.8465, "step": 135780 }, { "epoch": 2.383995505539072, "grad_norm": 0.053464935753489644, "learning_rate": 0.00011151808762042262, "loss": 0.8491, "step": 135790 }, { "epoch": 2.3841710704190735, "grad_norm": 0.045295872018850596, "learning_rate": 0.00011150700699231713, "loss": 0.8472, "step": 135800 }, { "epoch": 2.3843466352990745, "grad_norm": 0.0570254518342167, "learning_rate": 0.00011149592623108585, "loss": 0.8431, "step": 135810 }, { "epoch": 2.384522200179076, "grad_norm": 0.056985715540532965, "learning_rate": 0.00011148484533686918, "loss": 0.843, "step": 135820 }, { "epoch": 2.3846977650590775, "grad_norm": 0.043382556958506735, "learning_rate": 0.0001114737643098075, "loss": 0.8473, "step": 135830 }, { "epoch": 2.384873329939079, "grad_norm": 0.0658055527439204, "learning_rate": 0.00011146268315004123, "loss": 0.8363, "step": 135840 }, { "epoch": 2.3850488948190804, "grad_norm": 0.059145224660092956, "learning_rate": 0.00011145160185771071, "loss": 0.8435, "step": 135850 }, { "epoch": 2.385224459699082, "grad_norm": 0.05485898769627161, "learning_rate": 0.00011144052043295643, "loss": 0.8442, "step": 135860 }, { "epoch": 2.3854000245790834, "grad_norm": 0.049091506410229184, "learning_rate": 0.00011142943887591877, "loss": 0.8484, "step": 135870 }, { "epoch": 2.3855755894590844, "grad_norm": 0.051303940117973915, "learning_rate": 0.00011141835718673812, "loss": 0.8487, "step": 135880 }, { "epoch": 2.385751154339086, "grad_norm": 0.06542643507614472, "learning_rate": 0.00011140727536555486, "loss": 0.8506, "step": 135890 }, { "epoch": 2.3859267192190874, "grad_norm": 0.053485592418217466, "learning_rate": 0.00011139619341250943, "loss": 0.8445, "step": 135900 }, { "epoch": 2.386102284099089, "grad_norm": 0.058424231912986, "learning_rate": 0.00011138511132774224, "loss": 0.8483, "step": 135910 }, { "epoch": 2.3862778489790903, "grad_norm": 0.053669615526938524, "learning_rate": 0.0001113740291113937, "loss": 0.8452, "step": 135920 }, { "epoch": 2.386453413859092, "grad_norm": 0.04823612403572056, "learning_rate": 0.00011136294676360421, "loss": 0.8461, "step": 135930 }, { "epoch": 2.386628978739093, "grad_norm": 0.04823412421703006, "learning_rate": 0.00011135186428451425, "loss": 0.8423, "step": 135940 }, { "epoch": 2.3868045436190943, "grad_norm": 0.04271457906703056, "learning_rate": 0.00011134078167426414, "loss": 0.8517, "step": 135950 }, { "epoch": 2.386980108499096, "grad_norm": 0.07619285718305875, "learning_rate": 0.00011132969893299434, "loss": 0.8392, "step": 135960 }, { "epoch": 2.3871556733790973, "grad_norm": 0.0553269336021089, "learning_rate": 0.0001113186160608453, "loss": 0.8503, "step": 135970 }, { "epoch": 2.3873312382590988, "grad_norm": 0.044262629778197775, "learning_rate": 0.0001113075330579574, "loss": 0.8413, "step": 135980 }, { "epoch": 2.3875068031391002, "grad_norm": 0.06209766054214853, "learning_rate": 0.00011129644992447108, "loss": 0.8404, "step": 135990 }, { "epoch": 2.3876823680191013, "grad_norm": 0.059659781265969734, "learning_rate": 0.0001112853666605268, "loss": 0.853, "step": 136000 }, { "epoch": 2.3878579328991028, "grad_norm": 0.07467084666959009, "learning_rate": 0.00011127428326626493, "loss": 0.8433, "step": 136010 }, { "epoch": 2.3880334977791042, "grad_norm": 0.04663758757175809, "learning_rate": 0.00011126319974182593, "loss": 0.8481, "step": 136020 }, { "epoch": 2.3882090626591057, "grad_norm": 0.052008819930882945, "learning_rate": 0.00011125211608735018, "loss": 0.8379, "step": 136030 }, { "epoch": 2.388384627539107, "grad_norm": 0.05452832214030298, "learning_rate": 0.00011124103230297818, "loss": 0.8384, "step": 136040 }, { "epoch": 2.3885601924191087, "grad_norm": 0.0448278083790518, "learning_rate": 0.00011122994838885036, "loss": 0.8456, "step": 136050 }, { "epoch": 2.3887357572991097, "grad_norm": 0.05311398625917719, "learning_rate": 0.00011121886434510713, "loss": 0.8386, "step": 136060 }, { "epoch": 2.388911322179111, "grad_norm": 0.06192246025170192, "learning_rate": 0.0001112077801718889, "loss": 0.8437, "step": 136070 }, { "epoch": 2.3890868870591127, "grad_norm": 0.051960664908592885, "learning_rate": 0.00011119669586933616, "loss": 0.8475, "step": 136080 }, { "epoch": 2.389262451939114, "grad_norm": 0.06887317870558848, "learning_rate": 0.00011118561143758932, "loss": 0.8545, "step": 136090 }, { "epoch": 2.3894380168191156, "grad_norm": 0.044313156588725824, "learning_rate": 0.00011117452687678882, "loss": 0.851, "step": 136100 }, { "epoch": 2.389613581699117, "grad_norm": 0.060814525457578116, "learning_rate": 0.00011116344218707517, "loss": 0.8474, "step": 136110 }, { "epoch": 2.389789146579118, "grad_norm": 0.059579182743328976, "learning_rate": 0.00011115235736858871, "loss": 0.8345, "step": 136120 }, { "epoch": 2.3899647114591196, "grad_norm": 0.04434285349498977, "learning_rate": 0.00011114127242146994, "loss": 0.8486, "step": 136130 }, { "epoch": 2.390140276339121, "grad_norm": 0.07230766594797622, "learning_rate": 0.00011113018734585931, "loss": 0.8511, "step": 136140 }, { "epoch": 2.3903158412191226, "grad_norm": 0.05463536055933053, "learning_rate": 0.00011111910214189727, "loss": 0.8506, "step": 136150 }, { "epoch": 2.390491406099124, "grad_norm": 0.07738893505539414, "learning_rate": 0.00011110801680972427, "loss": 0.8403, "step": 136160 }, { "epoch": 2.3906669709791255, "grad_norm": 0.07178219212596966, "learning_rate": 0.00011109693134948075, "loss": 0.8433, "step": 136170 }, { "epoch": 2.390842535859127, "grad_norm": 0.0635399788298119, "learning_rate": 0.00011108584576130718, "loss": 0.8476, "step": 136180 }, { "epoch": 2.391018100739128, "grad_norm": 0.06487786321358693, "learning_rate": 0.000111074760045344, "loss": 0.8382, "step": 136190 }, { "epoch": 2.3911936656191295, "grad_norm": 0.06584051477250506, "learning_rate": 0.00011106367420173166, "loss": 0.8413, "step": 136200 }, { "epoch": 2.391369230499131, "grad_norm": 0.08865838107384084, "learning_rate": 0.00011105258823061068, "loss": 0.8425, "step": 136210 }, { "epoch": 2.3915447953791324, "grad_norm": 0.05771633330524254, "learning_rate": 0.00011104150213212148, "loss": 0.84, "step": 136220 }, { "epoch": 2.391720360259134, "grad_norm": 0.04834714508519739, "learning_rate": 0.00011103041590640454, "loss": 0.8421, "step": 136230 }, { "epoch": 2.391895925139135, "grad_norm": 0.05836448116206925, "learning_rate": 0.00011101932955360026, "loss": 0.8503, "step": 136240 }, { "epoch": 2.3920714900191364, "grad_norm": 0.06738060086953708, "learning_rate": 0.00011100824307384917, "loss": 0.8372, "step": 136250 }, { "epoch": 2.392247054899138, "grad_norm": 0.04850017174378084, "learning_rate": 0.00011099715646729174, "loss": 0.8434, "step": 136260 }, { "epoch": 2.3924226197791394, "grad_norm": 0.052590387894497596, "learning_rate": 0.00011098606973406837, "loss": 0.8451, "step": 136270 }, { "epoch": 2.392598184659141, "grad_norm": 0.05616939755149267, "learning_rate": 0.00011097498287431966, "loss": 0.8412, "step": 136280 }, { "epoch": 2.3927737495391423, "grad_norm": 0.052127272722998705, "learning_rate": 0.00011096389588818597, "loss": 0.848, "step": 136290 }, { "epoch": 2.392949314419144, "grad_norm": 0.050196173911595164, "learning_rate": 0.00011095280877580778, "loss": 0.838, "step": 136300 }, { "epoch": 2.393124879299145, "grad_norm": 0.049077072351219775, "learning_rate": 0.00011094172153732565, "loss": 0.8453, "step": 136310 }, { "epoch": 2.3933004441791463, "grad_norm": 0.04655105464177969, "learning_rate": 0.00011093063417287994, "loss": 0.8477, "step": 136320 }, { "epoch": 2.393476009059148, "grad_norm": 0.0550153746685437, "learning_rate": 0.00011091954668261125, "loss": 0.8439, "step": 136330 }, { "epoch": 2.3936515739391493, "grad_norm": 0.06542052059717275, "learning_rate": 0.00011090845906665999, "loss": 0.8405, "step": 136340 }, { "epoch": 2.3938271388191508, "grad_norm": 0.042702515160168514, "learning_rate": 0.00011089737132516664, "loss": 0.8431, "step": 136350 }, { "epoch": 2.394002703699152, "grad_norm": 0.056007794907006254, "learning_rate": 0.00011088628345827167, "loss": 0.8496, "step": 136360 }, { "epoch": 2.3941782685791533, "grad_norm": 0.05551365976452953, "learning_rate": 0.00011087519546611565, "loss": 0.8398, "step": 136370 }, { "epoch": 2.3943538334591548, "grad_norm": 0.057603942189245784, "learning_rate": 0.00011086410734883898, "loss": 0.8448, "step": 136380 }, { "epoch": 2.3945293983391562, "grad_norm": 0.06844730949506629, "learning_rate": 0.00011085301910658219, "loss": 0.8438, "step": 136390 }, { "epoch": 2.3947049632191577, "grad_norm": 0.057142422192387195, "learning_rate": 0.00011084193073948577, "loss": 0.8457, "step": 136400 }, { "epoch": 2.394880528099159, "grad_norm": 0.07681417903834677, "learning_rate": 0.00011083084224769016, "loss": 0.8454, "step": 136410 }, { "epoch": 2.3950560929791607, "grad_norm": 0.055013037926149415, "learning_rate": 0.00011081975363133592, "loss": 0.8531, "step": 136420 }, { "epoch": 2.3952316578591617, "grad_norm": 0.04655535647637035, "learning_rate": 0.00011080866489056355, "loss": 0.8456, "step": 136430 }, { "epoch": 2.395407222739163, "grad_norm": 0.05021056972942238, "learning_rate": 0.00011079757602551344, "loss": 0.8415, "step": 136440 }, { "epoch": 2.3955827876191647, "grad_norm": 0.057315173659868116, "learning_rate": 0.00011078648703632623, "loss": 0.8431, "step": 136450 }, { "epoch": 2.395758352499166, "grad_norm": 0.05382027167189947, "learning_rate": 0.00011077539792314234, "loss": 0.8457, "step": 136460 }, { "epoch": 2.3959339173791676, "grad_norm": 0.061850942873844406, "learning_rate": 0.00011076430868610229, "loss": 0.842, "step": 136470 }, { "epoch": 2.3961094822591686, "grad_norm": 0.06566839080598091, "learning_rate": 0.00011075321932534654, "loss": 0.8482, "step": 136480 }, { "epoch": 2.39628504713917, "grad_norm": 0.06967714841597666, "learning_rate": 0.00011074212984101566, "loss": 0.8462, "step": 136490 }, { "epoch": 2.3964606120191716, "grad_norm": 0.04730992084410836, "learning_rate": 0.00011073104023325014, "loss": 0.84, "step": 136500 }, { "epoch": 2.396636176899173, "grad_norm": 0.06655770404838825, "learning_rate": 0.00011071995050219049, "loss": 0.8481, "step": 136510 }, { "epoch": 2.3968117417791746, "grad_norm": 0.05233028794403445, "learning_rate": 0.00011070886064797716, "loss": 0.8446, "step": 136520 }, { "epoch": 2.396987306659176, "grad_norm": 0.049101676228709146, "learning_rate": 0.00011069777067075071, "loss": 0.8444, "step": 136530 }, { "epoch": 2.3971628715391775, "grad_norm": 0.05727689898736926, "learning_rate": 0.00011068668057065164, "loss": 0.8436, "step": 136540 }, { "epoch": 2.3973384364191785, "grad_norm": 0.05926658296925544, "learning_rate": 0.00011067559034782051, "loss": 0.8461, "step": 136550 }, { "epoch": 2.39751400129918, "grad_norm": 0.052479974373013355, "learning_rate": 0.00011066450000239778, "loss": 0.8403, "step": 136560 }, { "epoch": 2.3976895661791815, "grad_norm": 0.05070836701874276, "learning_rate": 0.000110653409534524, "loss": 0.8536, "step": 136570 }, { "epoch": 2.397865131059183, "grad_norm": 0.047357121372627334, "learning_rate": 0.00011064231894433962, "loss": 0.8421, "step": 136580 }, { "epoch": 2.3980406959391845, "grad_norm": 0.05650538826352159, "learning_rate": 0.00011063122823198524, "loss": 0.8525, "step": 136590 }, { "epoch": 2.3982162608191855, "grad_norm": 0.05562685215665266, "learning_rate": 0.00011062013739760135, "loss": 0.8467, "step": 136600 }, { "epoch": 2.398391825699187, "grad_norm": 0.04480081302900883, "learning_rate": 0.00011060904644132846, "loss": 0.8424, "step": 136610 }, { "epoch": 2.3985673905791884, "grad_norm": 0.05459924163804141, "learning_rate": 0.00011059795536330717, "loss": 0.8426, "step": 136620 }, { "epoch": 2.39874295545919, "grad_norm": 0.04906143111538187, "learning_rate": 0.0001105868641636779, "loss": 0.8419, "step": 136630 }, { "epoch": 2.3989185203391914, "grad_norm": 0.06382727159224229, "learning_rate": 0.00011057577284258121, "loss": 0.8437, "step": 136640 }, { "epoch": 2.399094085219193, "grad_norm": 0.08068809661955471, "learning_rate": 0.00011056468140015767, "loss": 0.8451, "step": 136650 }, { "epoch": 2.3992696500991944, "grad_norm": 0.06978202492305034, "learning_rate": 0.00011055358983654775, "loss": 0.8428, "step": 136660 }, { "epoch": 2.3994452149791954, "grad_norm": 0.0510512904117097, "learning_rate": 0.00011054249815189204, "loss": 0.8381, "step": 136670 }, { "epoch": 2.399620779859197, "grad_norm": 0.06641707652888178, "learning_rate": 0.00011053140634633106, "loss": 0.8464, "step": 136680 }, { "epoch": 2.3997963447391983, "grad_norm": 0.058825084289296774, "learning_rate": 0.00011052031442000535, "loss": 0.8384, "step": 136690 }, { "epoch": 2.3999719096192, "grad_norm": 0.04751182454661389, "learning_rate": 0.00011050922237305539, "loss": 0.8499, "step": 136700 }, { "epoch": 2.4001474744992013, "grad_norm": 0.06659850094424706, "learning_rate": 0.00011049813020562177, "loss": 0.8462, "step": 136710 }, { "epoch": 2.4003230393792028, "grad_norm": 0.06361596328010449, "learning_rate": 0.00011048703791784503, "loss": 0.8456, "step": 136720 }, { "epoch": 2.400498604259204, "grad_norm": 0.05486384844260283, "learning_rate": 0.00011047594550986569, "loss": 0.8447, "step": 136730 }, { "epoch": 2.4006741691392053, "grad_norm": 0.05158121708528834, "learning_rate": 0.00011046485298182434, "loss": 0.8477, "step": 136740 }, { "epoch": 2.4008497340192068, "grad_norm": 0.05135555383065123, "learning_rate": 0.00011045376033386143, "loss": 0.8487, "step": 136750 }, { "epoch": 2.4010252988992082, "grad_norm": 0.06497606685303159, "learning_rate": 0.00011044266756611759, "loss": 0.8455, "step": 136760 }, { "epoch": 2.4012008637792097, "grad_norm": 0.060536165795421475, "learning_rate": 0.00011043157467873335, "loss": 0.8455, "step": 136770 }, { "epoch": 2.401376428659211, "grad_norm": 0.0625986700016328, "learning_rate": 0.00011042048167184925, "loss": 0.8492, "step": 136780 }, { "epoch": 2.4015519935392122, "grad_norm": 0.057870068424229155, "learning_rate": 0.00011040938854560585, "loss": 0.8462, "step": 136790 }, { "epoch": 2.4017275584192137, "grad_norm": 0.05418768954727669, "learning_rate": 0.0001103982953001437, "loss": 0.8471, "step": 136800 }, { "epoch": 2.401903123299215, "grad_norm": 0.06929949734898712, "learning_rate": 0.00011038720193560333, "loss": 0.8445, "step": 136810 }, { "epoch": 2.4020786881792167, "grad_norm": 0.05682090029782749, "learning_rate": 0.00011037610845212529, "loss": 0.8443, "step": 136820 }, { "epoch": 2.402254253059218, "grad_norm": 0.0856499150002833, "learning_rate": 0.0001103650148498502, "loss": 0.843, "step": 136830 }, { "epoch": 2.4024298179392196, "grad_norm": 0.05161714846057119, "learning_rate": 0.00011035392112891856, "loss": 0.8421, "step": 136840 }, { "epoch": 2.402605382819221, "grad_norm": 0.05586956691972077, "learning_rate": 0.00011034282728947095, "loss": 0.8388, "step": 136850 }, { "epoch": 2.402780947699222, "grad_norm": 0.04606846314487125, "learning_rate": 0.00011033173333164793, "loss": 0.8412, "step": 136860 }, { "epoch": 2.4029565125792236, "grad_norm": 0.04792118772978804, "learning_rate": 0.00011032063925559005, "loss": 0.8432, "step": 136870 }, { "epoch": 2.403132077459225, "grad_norm": 0.05059961272350703, "learning_rate": 0.00011030954506143787, "loss": 0.8373, "step": 136880 }, { "epoch": 2.4033076423392266, "grad_norm": 0.06662947613853239, "learning_rate": 0.000110298450749332, "loss": 0.8482, "step": 136890 }, { "epoch": 2.403483207219228, "grad_norm": 0.055967622939257604, "learning_rate": 0.00011028735631941298, "loss": 0.8401, "step": 136900 }, { "epoch": 2.403658772099229, "grad_norm": 0.06100004621575844, "learning_rate": 0.00011027626177182135, "loss": 0.8397, "step": 136910 }, { "epoch": 2.4038343369792305, "grad_norm": 0.04852207301499156, "learning_rate": 0.00011026516710669772, "loss": 0.837, "step": 136920 }, { "epoch": 2.404009901859232, "grad_norm": 0.050956225089374495, "learning_rate": 0.00011025407232418263, "loss": 0.8427, "step": 136930 }, { "epoch": 2.4041854667392335, "grad_norm": 0.05102746613803304, "learning_rate": 0.00011024297742441669, "loss": 0.8535, "step": 136940 }, { "epoch": 2.404361031619235, "grad_norm": 0.06290671038040319, "learning_rate": 0.00011023188240754045, "loss": 0.8421, "step": 136950 }, { "epoch": 2.4045365964992365, "grad_norm": 0.07390847516694621, "learning_rate": 0.00011022078727369452, "loss": 0.8494, "step": 136960 }, { "epoch": 2.404712161379238, "grad_norm": 0.06913948286751802, "learning_rate": 0.00011020969202301939, "loss": 0.8406, "step": 136970 }, { "epoch": 2.404887726259239, "grad_norm": 0.049642557094404675, "learning_rate": 0.0001101985966556557, "loss": 0.8326, "step": 136980 }, { "epoch": 2.4050632911392404, "grad_norm": 0.04764981681435323, "learning_rate": 0.00011018750117174407, "loss": 0.8456, "step": 136990 }, { "epoch": 2.405238856019242, "grad_norm": 0.06158291288151327, "learning_rate": 0.00011017640557142503, "loss": 0.8417, "step": 137000 }, { "epoch": 2.4054144208992434, "grad_norm": 0.0583774222476529, "learning_rate": 0.00011016530985483917, "loss": 0.8401, "step": 137010 }, { "epoch": 2.405589985779245, "grad_norm": 0.05400741413797872, "learning_rate": 0.00011015421402212709, "loss": 0.8469, "step": 137020 }, { "epoch": 2.405765550659246, "grad_norm": 0.06169430776024137, "learning_rate": 0.00011014311807342934, "loss": 0.8431, "step": 137030 }, { "epoch": 2.4059411155392474, "grad_norm": 0.05558256825288102, "learning_rate": 0.00011013202200888653, "loss": 0.8416, "step": 137040 }, { "epoch": 2.406116680419249, "grad_norm": 0.06164951664257011, "learning_rate": 0.00011012092582863924, "loss": 0.8433, "step": 137050 }, { "epoch": 2.4062922452992503, "grad_norm": 0.05740389578206979, "learning_rate": 0.00011010982953282809, "loss": 0.848, "step": 137060 }, { "epoch": 2.406467810179252, "grad_norm": 0.08114196919274737, "learning_rate": 0.0001100987331215937, "loss": 0.8404, "step": 137070 }, { "epoch": 2.4066433750592533, "grad_norm": 0.04513810983746019, "learning_rate": 0.00011008763659507654, "loss": 0.8491, "step": 137080 }, { "epoch": 2.4068189399392548, "grad_norm": 0.04909311888729204, "learning_rate": 0.00011007653995341731, "loss": 0.8514, "step": 137090 }, { "epoch": 2.406994504819256, "grad_norm": 0.053037940150736464, "learning_rate": 0.00011006544319675658, "loss": 0.8466, "step": 137100 }, { "epoch": 2.4071700696992573, "grad_norm": 0.06908631284091632, "learning_rate": 0.00011005434632523497, "loss": 0.8396, "step": 137110 }, { "epoch": 2.4073456345792588, "grad_norm": 0.05029290765132417, "learning_rate": 0.00011004324933899303, "loss": 0.8371, "step": 137120 }, { "epoch": 2.4075211994592602, "grad_norm": 0.059560613012841816, "learning_rate": 0.0001100321522381714, "loss": 0.8418, "step": 137130 }, { "epoch": 2.4076967643392617, "grad_norm": 0.08025014727274779, "learning_rate": 0.00011002105502291066, "loss": 0.8437, "step": 137140 }, { "epoch": 2.4078723292192628, "grad_norm": 0.050290833733350054, "learning_rate": 0.00011000995769335143, "loss": 0.8527, "step": 137150 }, { "epoch": 2.4080478940992642, "grad_norm": 0.0657409471740963, "learning_rate": 0.0001099988602496343, "loss": 0.836, "step": 137160 }, { "epoch": 2.4082234589792657, "grad_norm": 0.057643231650381994, "learning_rate": 0.0001099877626918999, "loss": 0.8474, "step": 137170 }, { "epoch": 2.408399023859267, "grad_norm": 0.06394960148228845, "learning_rate": 0.00010997666502028883, "loss": 0.8432, "step": 137180 }, { "epoch": 2.4085745887392687, "grad_norm": 0.08028610732149835, "learning_rate": 0.00010996556723494167, "loss": 0.8463, "step": 137190 }, { "epoch": 2.40875015361927, "grad_norm": 0.06624523070738446, "learning_rate": 0.00010995446933599911, "loss": 0.8479, "step": 137200 }, { "epoch": 2.4089257184992716, "grad_norm": 0.06170650310212568, "learning_rate": 0.00010994337132360164, "loss": 0.844, "step": 137210 }, { "epoch": 2.4091012833792727, "grad_norm": 0.056590215936247516, "learning_rate": 0.00010993227319788995, "loss": 0.8536, "step": 137220 }, { "epoch": 2.409276848259274, "grad_norm": 0.051927689179226515, "learning_rate": 0.00010992117495900467, "loss": 0.8523, "step": 137230 }, { "epoch": 2.4094524131392756, "grad_norm": 0.06743026950594895, "learning_rate": 0.0001099100766070864, "loss": 0.8429, "step": 137240 }, { "epoch": 2.409627978019277, "grad_norm": 0.06240069741760646, "learning_rate": 0.00010989897814227574, "loss": 0.8503, "step": 137250 }, { "epoch": 2.4098035428992786, "grad_norm": 0.05026155525266967, "learning_rate": 0.00010988787956471331, "loss": 0.8404, "step": 137260 }, { "epoch": 2.4099791077792796, "grad_norm": 0.05382389672092984, "learning_rate": 0.00010987678087453974, "loss": 0.8353, "step": 137270 }, { "epoch": 2.410154672659281, "grad_norm": 0.08530975365628353, "learning_rate": 0.00010986568207189567, "loss": 0.8408, "step": 137280 }, { "epoch": 2.4103302375392825, "grad_norm": 0.05552527509181815, "learning_rate": 0.00010985458315692171, "loss": 0.8417, "step": 137290 }, { "epoch": 2.410505802419284, "grad_norm": 0.06217909365328076, "learning_rate": 0.00010984348412975854, "loss": 0.8421, "step": 137300 }, { "epoch": 2.4106813672992855, "grad_norm": 0.0716807151383372, "learning_rate": 0.00010983238499054665, "loss": 0.8428, "step": 137310 }, { "epoch": 2.410856932179287, "grad_norm": 0.055183387015115014, "learning_rate": 0.00010982128573942674, "loss": 0.8493, "step": 137320 }, { "epoch": 2.4110324970592885, "grad_norm": 0.07877735990619243, "learning_rate": 0.00010981018637653948, "loss": 0.8495, "step": 137330 }, { "epoch": 2.4112080619392895, "grad_norm": 0.05442651164128158, "learning_rate": 0.00010979908690202548, "loss": 0.8467, "step": 137340 }, { "epoch": 2.411383626819291, "grad_norm": 0.049765836891351105, "learning_rate": 0.00010978798731602535, "loss": 0.8478, "step": 137350 }, { "epoch": 2.4115591916992924, "grad_norm": 0.06499969749807086, "learning_rate": 0.00010977688761867974, "loss": 0.8418, "step": 137360 }, { "epoch": 2.411734756579294, "grad_norm": 0.05315738955496484, "learning_rate": 0.00010976578781012931, "loss": 0.8446, "step": 137370 }, { "epoch": 2.4119103214592954, "grad_norm": 0.0683135937470344, "learning_rate": 0.00010975468789051462, "loss": 0.8467, "step": 137380 }, { "epoch": 2.412085886339297, "grad_norm": 0.09313826373203969, "learning_rate": 0.00010974358785997636, "loss": 0.8424, "step": 137390 }, { "epoch": 2.412261451219298, "grad_norm": 0.045207812227350215, "learning_rate": 0.0001097324877186552, "loss": 0.8534, "step": 137400 }, { "epoch": 2.4124370160992994, "grad_norm": 0.04863290880923658, "learning_rate": 0.00010972138746669176, "loss": 0.8451, "step": 137410 }, { "epoch": 2.412612580979301, "grad_norm": 0.05623480549713209, "learning_rate": 0.00010971028710422664, "loss": 0.8476, "step": 137420 }, { "epoch": 2.4127881458593023, "grad_norm": 0.0813771175110812, "learning_rate": 0.00010969918663140051, "loss": 0.8412, "step": 137430 }, { "epoch": 2.412963710739304, "grad_norm": 0.0594118739339428, "learning_rate": 0.00010968808604835399, "loss": 0.8513, "step": 137440 }, { "epoch": 2.4131392756193053, "grad_norm": 0.055622568287697856, "learning_rate": 0.00010967698535522779, "loss": 0.8502, "step": 137450 }, { "epoch": 2.4133148404993063, "grad_norm": 0.0706794106239812, "learning_rate": 0.00010966588455216253, "loss": 0.8471, "step": 137460 }, { "epoch": 2.413490405379308, "grad_norm": 0.06544628029330891, "learning_rate": 0.00010965478363929887, "loss": 0.8488, "step": 137470 }, { "epoch": 2.4136659702593093, "grad_norm": 0.052368819078921346, "learning_rate": 0.0001096436826167774, "loss": 0.8424, "step": 137480 }, { "epoch": 2.4138415351393108, "grad_norm": 0.055819658827603494, "learning_rate": 0.0001096325814847388, "loss": 0.8475, "step": 137490 }, { "epoch": 2.4140171000193122, "grad_norm": 0.04618167160977969, "learning_rate": 0.00010962148024332377, "loss": 0.8436, "step": 137500 }, { "epoch": 2.4141926648993137, "grad_norm": 0.05332779842155563, "learning_rate": 0.00010961037889267292, "loss": 0.8463, "step": 137510 }, { "epoch": 2.4143682297793148, "grad_norm": 0.05099415388408288, "learning_rate": 0.00010959927743292692, "loss": 0.8474, "step": 137520 }, { "epoch": 2.4145437946593162, "grad_norm": 0.047012871928420664, "learning_rate": 0.00010958817586422647, "loss": 0.8398, "step": 137530 }, { "epoch": 2.4147193595393177, "grad_norm": 0.05631453885804718, "learning_rate": 0.00010957707418671215, "loss": 0.8395, "step": 137540 }, { "epoch": 2.414894924419319, "grad_norm": 0.05167799598608747, "learning_rate": 0.00010956597240052467, "loss": 0.8456, "step": 137550 }, { "epoch": 2.4150704892993207, "grad_norm": 0.053885540220461464, "learning_rate": 0.00010955487050580462, "loss": 0.8493, "step": 137560 }, { "epoch": 2.415246054179322, "grad_norm": 0.06935714929004408, "learning_rate": 0.00010954376850269279, "loss": 0.8395, "step": 137570 }, { "epoch": 2.415421619059323, "grad_norm": 0.04899185490967768, "learning_rate": 0.00010953266639132978, "loss": 0.8423, "step": 137580 }, { "epoch": 2.4155971839393247, "grad_norm": 0.06141146262070851, "learning_rate": 0.00010952156417185619, "loss": 0.8442, "step": 137590 }, { "epoch": 2.415772748819326, "grad_norm": 0.043345794454243025, "learning_rate": 0.00010951046184441278, "loss": 0.8493, "step": 137600 }, { "epoch": 2.4159483136993276, "grad_norm": 0.06558421885374151, "learning_rate": 0.0001094993594091402, "loss": 0.8364, "step": 137610 }, { "epoch": 2.416123878579329, "grad_norm": 0.04141281539465908, "learning_rate": 0.00010948825686617909, "loss": 0.8432, "step": 137620 }, { "epoch": 2.4162994434593306, "grad_norm": 0.06430256442221606, "learning_rate": 0.00010947715421567015, "loss": 0.8393, "step": 137630 }, { "epoch": 2.416475008339332, "grad_norm": 0.061156441247239096, "learning_rate": 0.00010946605145775406, "loss": 0.842, "step": 137640 }, { "epoch": 2.416650573219333, "grad_norm": 0.06514012065562758, "learning_rate": 0.00010945494859257144, "loss": 0.8434, "step": 137650 }, { "epoch": 2.4168261380993346, "grad_norm": 0.05191169820637229, "learning_rate": 0.00010944384562026302, "loss": 0.8375, "step": 137660 }, { "epoch": 2.417001702979336, "grad_norm": 0.0589274666194637, "learning_rate": 0.00010943274254096942, "loss": 0.8394, "step": 137670 }, { "epoch": 2.4171772678593375, "grad_norm": 0.05333641059194803, "learning_rate": 0.00010942163935483139, "loss": 0.8437, "step": 137680 }, { "epoch": 2.417352832739339, "grad_norm": 0.056984507489653814, "learning_rate": 0.00010941053606198957, "loss": 0.8376, "step": 137690 }, { "epoch": 2.41752839761934, "grad_norm": 0.04220848481465628, "learning_rate": 0.00010939943266258467, "loss": 0.8493, "step": 137700 }, { "epoch": 2.4177039624993415, "grad_norm": 0.052367185771480106, "learning_rate": 0.00010938832915675734, "loss": 0.8419, "step": 137710 }, { "epoch": 2.417879527379343, "grad_norm": 0.0624269061465369, "learning_rate": 0.00010937722554464825, "loss": 0.8443, "step": 137720 }, { "epoch": 2.4180550922593445, "grad_norm": 0.04436506430294018, "learning_rate": 0.0001093661218263981, "loss": 0.8401, "step": 137730 }, { "epoch": 2.418230657139346, "grad_norm": 0.048776321787250936, "learning_rate": 0.0001093550180021476, "loss": 0.8408, "step": 137740 }, { "epoch": 2.4184062220193474, "grad_norm": 0.0598307749118762, "learning_rate": 0.00010934391407203743, "loss": 0.8439, "step": 137750 }, { "epoch": 2.418581786899349, "grad_norm": 0.05092083486211644, "learning_rate": 0.00010933281003620828, "loss": 0.8504, "step": 137760 }, { "epoch": 2.41875735177935, "grad_norm": 0.05297384948193832, "learning_rate": 0.0001093217058948008, "loss": 0.8493, "step": 137770 }, { "epoch": 2.4189329166593514, "grad_norm": 0.07823944355204755, "learning_rate": 0.00010931060164795568, "loss": 0.8433, "step": 137780 }, { "epoch": 2.419108481539353, "grad_norm": 0.058034857688149005, "learning_rate": 0.00010929949729581373, "loss": 0.8468, "step": 137790 }, { "epoch": 2.4192840464193544, "grad_norm": 0.05614482938233714, "learning_rate": 0.0001092883928385155, "loss": 0.8428, "step": 137800 }, { "epoch": 2.419459611299356, "grad_norm": 0.05093003905662201, "learning_rate": 0.00010927728827620176, "loss": 0.8477, "step": 137810 }, { "epoch": 2.419635176179357, "grad_norm": 0.044323777039300426, "learning_rate": 0.0001092661836090132, "loss": 0.8412, "step": 137820 }, { "epoch": 2.4198107410593583, "grad_norm": 0.06150770896839833, "learning_rate": 0.0001092550788370905, "loss": 0.852, "step": 137830 }, { "epoch": 2.41998630593936, "grad_norm": 0.06995214578943805, "learning_rate": 0.00010924397396057435, "loss": 0.8442, "step": 137840 }, { "epoch": 2.4201618708193613, "grad_norm": 0.07743940182229744, "learning_rate": 0.00010923286897960549, "loss": 0.8473, "step": 137850 }, { "epoch": 2.4203374356993628, "grad_norm": 0.05766960517834828, "learning_rate": 0.00010922176389432459, "loss": 0.8455, "step": 137860 }, { "epoch": 2.4205130005793642, "grad_norm": 0.057060844045429084, "learning_rate": 0.00010921065870487239, "loss": 0.8501, "step": 137870 }, { "epoch": 2.4206885654593657, "grad_norm": 0.04531984300891696, "learning_rate": 0.00010919955341138956, "loss": 0.8523, "step": 137880 }, { "epoch": 2.4208641303393668, "grad_norm": 0.07073359225489485, "learning_rate": 0.00010918844801401682, "loss": 0.8461, "step": 137890 }, { "epoch": 2.4210396952193682, "grad_norm": 0.05894874647124465, "learning_rate": 0.00010917734251289487, "loss": 0.8466, "step": 137900 }, { "epoch": 2.4212152600993697, "grad_norm": 0.054769093696915384, "learning_rate": 0.00010916623690816441, "loss": 0.8442, "step": 137910 }, { "epoch": 2.421390824979371, "grad_norm": 0.05347828472886927, "learning_rate": 0.00010915513119996618, "loss": 0.8461, "step": 137920 }, { "epoch": 2.4215663898593727, "grad_norm": 0.05602197203030771, "learning_rate": 0.00010914402538844089, "loss": 0.8495, "step": 137930 }, { "epoch": 2.4217419547393737, "grad_norm": 0.05843190664775145, "learning_rate": 0.00010913291947372921, "loss": 0.846, "step": 137940 }, { "epoch": 2.421917519619375, "grad_norm": 0.05200004006533318, "learning_rate": 0.00010912181345597185, "loss": 0.8444, "step": 137950 }, { "epoch": 2.4220930844993767, "grad_norm": 0.057482003198123736, "learning_rate": 0.00010911070733530962, "loss": 0.85, "step": 137960 }, { "epoch": 2.422268649379378, "grad_norm": 0.05038662406360784, "learning_rate": 0.00010909960111188315, "loss": 0.8553, "step": 137970 }, { "epoch": 2.4224442142593796, "grad_norm": 0.07325178370770673, "learning_rate": 0.00010908849478583318, "loss": 0.844, "step": 137980 }, { "epoch": 2.422619779139381, "grad_norm": 0.058351686368922046, "learning_rate": 0.00010907738835730042, "loss": 0.8404, "step": 137990 }, { "epoch": 2.4227953440193826, "grad_norm": 0.055485165116309715, "learning_rate": 0.00010906628182642564, "loss": 0.8472, "step": 138000 }, { "epoch": 2.4229709088993836, "grad_norm": 0.05322701634391084, "learning_rate": 0.00010905517519334947, "loss": 0.8476, "step": 138010 }, { "epoch": 2.423146473779385, "grad_norm": 0.05764926840348585, "learning_rate": 0.00010904406845821269, "loss": 0.846, "step": 138020 }, { "epoch": 2.4233220386593866, "grad_norm": 0.06240239786299041, "learning_rate": 0.00010903296162115609, "loss": 0.8506, "step": 138030 }, { "epoch": 2.423497603539388, "grad_norm": 0.054827477483044, "learning_rate": 0.00010902185468232026, "loss": 0.8424, "step": 138040 }, { "epoch": 2.4236731684193895, "grad_norm": 0.07921768295258343, "learning_rate": 0.00010901074764184604, "loss": 0.8449, "step": 138050 }, { "epoch": 2.4238487332993905, "grad_norm": 0.07812693485227101, "learning_rate": 0.00010899964049987408, "loss": 0.842, "step": 138060 }, { "epoch": 2.424024298179392, "grad_norm": 0.047490383043254945, "learning_rate": 0.00010898853325654512, "loss": 0.8432, "step": 138070 }, { "epoch": 2.4241998630593935, "grad_norm": 0.0492352167445432, "learning_rate": 0.00010897742591199995, "loss": 0.8493, "step": 138080 }, { "epoch": 2.424375427939395, "grad_norm": 0.046154596573439725, "learning_rate": 0.00010896631846637926, "loss": 0.8508, "step": 138090 }, { "epoch": 2.4245509928193965, "grad_norm": 0.04809948811839458, "learning_rate": 0.00010895521091982382, "loss": 0.8464, "step": 138100 }, { "epoch": 2.424726557699398, "grad_norm": 0.04934400603525908, "learning_rate": 0.00010894410327247429, "loss": 0.8541, "step": 138110 }, { "epoch": 2.4249021225793994, "grad_norm": 0.06639905107877984, "learning_rate": 0.00010893299552447143, "loss": 0.8404, "step": 138120 }, { "epoch": 2.4250776874594004, "grad_norm": 0.053614020770851104, "learning_rate": 0.00010892188767595604, "loss": 0.8371, "step": 138130 }, { "epoch": 2.425253252339402, "grad_norm": 0.05408450238428727, "learning_rate": 0.0001089107797270688, "loss": 0.8465, "step": 138140 }, { "epoch": 2.4254288172194034, "grad_norm": 0.060776088957839605, "learning_rate": 0.00010889967167795047, "loss": 0.8426, "step": 138150 }, { "epoch": 2.425604382099405, "grad_norm": 0.05462283389736188, "learning_rate": 0.00010888856352874176, "loss": 0.8446, "step": 138160 }, { "epoch": 2.4257799469794064, "grad_norm": 0.05321385417809529, "learning_rate": 0.00010887745527958346, "loss": 0.8476, "step": 138170 }, { "epoch": 2.425955511859408, "grad_norm": 0.05533671203082974, "learning_rate": 0.00010886634693061629, "loss": 0.8533, "step": 138180 }, { "epoch": 2.426131076739409, "grad_norm": 0.046158008498947994, "learning_rate": 0.00010885523848198096, "loss": 0.8392, "step": 138190 }, { "epoch": 2.4263066416194103, "grad_norm": 0.05628961264539469, "learning_rate": 0.00010884412993381832, "loss": 0.8445, "step": 138200 }, { "epoch": 2.426482206499412, "grad_norm": 0.05272738777060443, "learning_rate": 0.00010883302128626902, "loss": 0.8437, "step": 138210 }, { "epoch": 2.4266577713794133, "grad_norm": 0.08007867138060688, "learning_rate": 0.00010882191253947383, "loss": 0.8494, "step": 138220 }, { "epoch": 2.4268333362594148, "grad_norm": 0.0846876018925317, "learning_rate": 0.00010881080369357352, "loss": 0.8462, "step": 138230 }, { "epoch": 2.4270089011394163, "grad_norm": 0.05631248347329398, "learning_rate": 0.00010879969474870881, "loss": 0.8368, "step": 138240 }, { "epoch": 2.4271844660194173, "grad_norm": 0.056426035658709725, "learning_rate": 0.00010878858570502051, "loss": 0.8472, "step": 138250 }, { "epoch": 2.4273600308994188, "grad_norm": 0.06700693071516357, "learning_rate": 0.00010877747656264932, "loss": 0.8298, "step": 138260 }, { "epoch": 2.4275355957794202, "grad_norm": 0.046203705445673385, "learning_rate": 0.00010876636732173601, "loss": 0.8423, "step": 138270 }, { "epoch": 2.4277111606594217, "grad_norm": 0.06081266326374724, "learning_rate": 0.00010875525798242133, "loss": 0.835, "step": 138280 }, { "epoch": 2.427886725539423, "grad_norm": 0.043419630540682146, "learning_rate": 0.00010874414854484602, "loss": 0.8427, "step": 138290 }, { "epoch": 2.4280622904194247, "grad_norm": 0.06109734452442898, "learning_rate": 0.00010873303900915091, "loss": 0.8546, "step": 138300 }, { "epoch": 2.428237855299426, "grad_norm": 0.08554928424820614, "learning_rate": 0.00010872192937547671, "loss": 0.8514, "step": 138310 }, { "epoch": 2.428413420179427, "grad_norm": 0.087710344410742, "learning_rate": 0.00010871081964396417, "loss": 0.8397, "step": 138320 }, { "epoch": 2.4285889850594287, "grad_norm": 0.0694703819632762, "learning_rate": 0.00010869970981475406, "loss": 0.8472, "step": 138330 }, { "epoch": 2.42876454993943, "grad_norm": 0.053181878369814105, "learning_rate": 0.00010868859988798719, "loss": 0.8458, "step": 138340 }, { "epoch": 2.4289401148194316, "grad_norm": 0.053271564383928265, "learning_rate": 0.00010867748986380424, "loss": 0.8502, "step": 138350 }, { "epoch": 2.429115679699433, "grad_norm": 0.053743979473739385, "learning_rate": 0.00010866637974234601, "loss": 0.845, "step": 138360 }, { "epoch": 2.429291244579434, "grad_norm": 0.04902952535495593, "learning_rate": 0.00010865526952375333, "loss": 0.8333, "step": 138370 }, { "epoch": 2.4294668094594356, "grad_norm": 0.05206661213510157, "learning_rate": 0.00010864415920816691, "loss": 0.8454, "step": 138380 }, { "epoch": 2.429642374339437, "grad_norm": 0.050437745815659116, "learning_rate": 0.00010863304879572751, "loss": 0.8428, "step": 138390 }, { "epoch": 2.4298179392194386, "grad_norm": 0.052475786781606346, "learning_rate": 0.00010862193828657589, "loss": 0.8425, "step": 138400 }, { "epoch": 2.42999350409944, "grad_norm": 0.05377933442372619, "learning_rate": 0.00010861082768085288, "loss": 0.8436, "step": 138410 }, { "epoch": 2.4301690689794415, "grad_norm": 0.0600431074788126, "learning_rate": 0.00010859971697869927, "loss": 0.8506, "step": 138420 }, { "epoch": 2.430344633859443, "grad_norm": 0.0532063678979175, "learning_rate": 0.00010858860618025574, "loss": 0.8479, "step": 138430 }, { "epoch": 2.430520198739444, "grad_norm": 0.052102157859988146, "learning_rate": 0.00010857749528566312, "loss": 0.8504, "step": 138440 }, { "epoch": 2.4306957636194455, "grad_norm": 0.050785319420587165, "learning_rate": 0.00010856638429506218, "loss": 0.8506, "step": 138450 }, { "epoch": 2.430871328499447, "grad_norm": 0.056421863092653575, "learning_rate": 0.0001085552732085937, "loss": 0.8414, "step": 138460 }, { "epoch": 2.4310468933794485, "grad_norm": 0.05989873991400742, "learning_rate": 0.00010854416202639848, "loss": 0.8464, "step": 138470 }, { "epoch": 2.43122245825945, "grad_norm": 0.056071831789920874, "learning_rate": 0.00010853305074861728, "loss": 0.8449, "step": 138480 }, { "epoch": 2.431398023139451, "grad_norm": 0.0572346362888503, "learning_rate": 0.00010852193937539088, "loss": 0.8475, "step": 138490 }, { "epoch": 2.4315735880194524, "grad_norm": 0.07778943840793782, "learning_rate": 0.00010851082790686004, "loss": 0.8444, "step": 138500 }, { "epoch": 2.431749152899454, "grad_norm": 0.057627103494665204, "learning_rate": 0.0001084997163431656, "loss": 0.8511, "step": 138510 }, { "epoch": 2.4319247177794554, "grad_norm": 0.06445006049028995, "learning_rate": 0.00010848860468444827, "loss": 0.846, "step": 138520 }, { "epoch": 2.432100282659457, "grad_norm": 0.05730500456016436, "learning_rate": 0.00010847749293084893, "loss": 0.8474, "step": 138530 }, { "epoch": 2.4322758475394584, "grad_norm": 0.06199419574661729, "learning_rate": 0.00010846638108250832, "loss": 0.8364, "step": 138540 }, { "epoch": 2.43245141241946, "grad_norm": 0.05458843134993274, "learning_rate": 0.00010845526913956723, "loss": 0.8487, "step": 138550 }, { "epoch": 2.432626977299461, "grad_norm": 0.05813834066398378, "learning_rate": 0.00010844415710216645, "loss": 0.846, "step": 138560 }, { "epoch": 2.4328025421794623, "grad_norm": 0.044287339589768764, "learning_rate": 0.00010843304497044677, "loss": 0.8471, "step": 138570 }, { "epoch": 2.432978107059464, "grad_norm": 0.07440425960765305, "learning_rate": 0.00010842193274454895, "loss": 0.8444, "step": 138580 }, { "epoch": 2.4331536719394653, "grad_norm": 0.06391839244061702, "learning_rate": 0.00010841082042461388, "loss": 0.8488, "step": 138590 }, { "epoch": 2.433329236819467, "grad_norm": 0.05113292306584992, "learning_rate": 0.00010839970801078225, "loss": 0.8466, "step": 138600 }, { "epoch": 2.433504801699468, "grad_norm": 0.06527200167768382, "learning_rate": 0.00010838859550319494, "loss": 0.8421, "step": 138610 }, { "epoch": 2.4336803665794693, "grad_norm": 0.05870857530578415, "learning_rate": 0.00010837748290199268, "loss": 0.8423, "step": 138620 }, { "epoch": 2.4338559314594708, "grad_norm": 0.05305169036511596, "learning_rate": 0.00010836637020731629, "loss": 0.8397, "step": 138630 }, { "epoch": 2.4340314963394722, "grad_norm": 0.06642253974267923, "learning_rate": 0.00010835525741930662, "loss": 0.8485, "step": 138640 }, { "epoch": 2.4342070612194737, "grad_norm": 0.05057135410593122, "learning_rate": 0.0001083441445381044, "loss": 0.8539, "step": 138650 }, { "epoch": 2.434382626099475, "grad_norm": 0.049702915419726185, "learning_rate": 0.00010833303156385046, "loss": 0.8475, "step": 138660 }, { "epoch": 2.4345581909794767, "grad_norm": 0.0602290539510732, "learning_rate": 0.0001083219184966856, "loss": 0.8462, "step": 138670 }, { "epoch": 2.4347337558594777, "grad_norm": 0.05529993230809688, "learning_rate": 0.00010831080533675064, "loss": 0.853, "step": 138680 }, { "epoch": 2.434909320739479, "grad_norm": 0.06519048696358372, "learning_rate": 0.00010829969208418636, "loss": 0.8483, "step": 138690 }, { "epoch": 2.4350848856194807, "grad_norm": 0.06266686083880009, "learning_rate": 0.00010828857873913358, "loss": 0.8329, "step": 138700 }, { "epoch": 2.435260450499482, "grad_norm": 0.058928798039583584, "learning_rate": 0.00010827746530173313, "loss": 0.846, "step": 138710 }, { "epoch": 2.4354360153794836, "grad_norm": 0.05565180161374259, "learning_rate": 0.0001082663517721258, "loss": 0.8368, "step": 138720 }, { "epoch": 2.4356115802594847, "grad_norm": 0.07189555444597007, "learning_rate": 0.0001082552381504524, "loss": 0.8453, "step": 138730 }, { "epoch": 2.435787145139486, "grad_norm": 0.0616172225787174, "learning_rate": 0.00010824412443685374, "loss": 0.8448, "step": 138740 }, { "epoch": 2.4359627100194876, "grad_norm": 0.05349570470519258, "learning_rate": 0.00010823301063147062, "loss": 0.8463, "step": 138750 }, { "epoch": 2.436138274899489, "grad_norm": 0.04581866055354856, "learning_rate": 0.00010822189673444387, "loss": 0.8439, "step": 138760 }, { "epoch": 2.4363138397794906, "grad_norm": 0.04258477454204033, "learning_rate": 0.00010821078274591435, "loss": 0.8418, "step": 138770 }, { "epoch": 2.436489404659492, "grad_norm": 0.053990462507345606, "learning_rate": 0.0001081996686660228, "loss": 0.8359, "step": 138780 }, { "epoch": 2.4366649695394935, "grad_norm": 0.09678792780292192, "learning_rate": 0.00010818855449491003, "loss": 0.8431, "step": 138790 }, { "epoch": 2.4368405344194946, "grad_norm": 0.05567909323027837, "learning_rate": 0.00010817744023271694, "loss": 0.8443, "step": 138800 }, { "epoch": 2.437016099299496, "grad_norm": 0.04944176214765995, "learning_rate": 0.00010816632587958431, "loss": 0.8424, "step": 138810 }, { "epoch": 2.4371916641794975, "grad_norm": 0.05211883478441271, "learning_rate": 0.00010815521143565295, "loss": 0.8507, "step": 138820 }, { "epoch": 2.437367229059499, "grad_norm": 0.045807717631084474, "learning_rate": 0.00010814409690106371, "loss": 0.8419, "step": 138830 }, { "epoch": 2.4375427939395005, "grad_norm": 0.09739799324175519, "learning_rate": 0.00010813298227595734, "loss": 0.8514, "step": 138840 }, { "epoch": 2.437718358819502, "grad_norm": 0.04707167684635818, "learning_rate": 0.00010812186756047476, "loss": 0.843, "step": 138850 }, { "epoch": 2.437893923699503, "grad_norm": 0.04184921716950721, "learning_rate": 0.0001081107527547567, "loss": 0.8459, "step": 138860 }, { "epoch": 2.4380694885795045, "grad_norm": 0.05181462061405836, "learning_rate": 0.00010809963785894409, "loss": 0.8452, "step": 138870 }, { "epoch": 2.438245053459506, "grad_norm": 0.0620588740858952, "learning_rate": 0.0001080885228731777, "loss": 0.8434, "step": 138880 }, { "epoch": 2.4384206183395074, "grad_norm": 0.05403583369822361, "learning_rate": 0.00010807740779759839, "loss": 0.8459, "step": 138890 }, { "epoch": 2.438596183219509, "grad_norm": 0.061560158492213364, "learning_rate": 0.00010806629263234693, "loss": 0.8411, "step": 138900 }, { "epoch": 2.4387717480995104, "grad_norm": 0.04772474756899504, "learning_rate": 0.00010805517737756418, "loss": 0.8483, "step": 138910 }, { "epoch": 2.4389473129795114, "grad_norm": 0.06488691608735811, "learning_rate": 0.000108044062033391, "loss": 0.8422, "step": 138920 }, { "epoch": 2.439122877859513, "grad_norm": 0.06994295193812318, "learning_rate": 0.00010803294659996821, "loss": 0.8362, "step": 138930 }, { "epoch": 2.4392984427395143, "grad_norm": 0.058258748373833054, "learning_rate": 0.00010802183107743665, "loss": 0.8428, "step": 138940 }, { "epoch": 2.439474007619516, "grad_norm": 0.06301556021352936, "learning_rate": 0.00010801071546593712, "loss": 0.8354, "step": 138950 }, { "epoch": 2.4396495724995173, "grad_norm": 0.059281168686661945, "learning_rate": 0.00010799959976561047, "loss": 0.8423, "step": 138960 }, { "epoch": 2.439825137379519, "grad_norm": 0.0857164381142839, "learning_rate": 0.00010798848397659755, "loss": 0.8394, "step": 138970 }, { "epoch": 2.44000070225952, "grad_norm": 0.0608753016102415, "learning_rate": 0.00010797736809903925, "loss": 0.8465, "step": 138980 }, { "epoch": 2.4401762671395213, "grad_norm": 0.047531584864686637, "learning_rate": 0.00010796625213307632, "loss": 0.8377, "step": 138990 }, { "epoch": 2.4403518320195228, "grad_norm": 0.06432272759229787, "learning_rate": 0.00010795513607884964, "loss": 0.8348, "step": 139000 }, { "epoch": 2.4405273968995242, "grad_norm": 0.07825750149159091, "learning_rate": 0.00010794401993650008, "loss": 0.8465, "step": 139010 }, { "epoch": 2.4407029617795257, "grad_norm": 0.04858011688216637, "learning_rate": 0.00010793290370616845, "loss": 0.8459, "step": 139020 }, { "epoch": 2.440878526659527, "grad_norm": 0.06753189955099899, "learning_rate": 0.00010792178738799554, "loss": 0.8415, "step": 139030 }, { "epoch": 2.4410540915395282, "grad_norm": 0.05209288799463472, "learning_rate": 0.00010791067098212228, "loss": 0.8484, "step": 139040 }, { "epoch": 2.4412296564195297, "grad_norm": 0.059128543351422376, "learning_rate": 0.00010789955448868953, "loss": 0.8471, "step": 139050 }, { "epoch": 2.441405221299531, "grad_norm": 0.05691539585431973, "learning_rate": 0.00010788843790783812, "loss": 0.8415, "step": 139060 }, { "epoch": 2.4415807861795327, "grad_norm": 0.05521125928816694, "learning_rate": 0.00010787732123970886, "loss": 0.8439, "step": 139070 }, { "epoch": 2.441756351059534, "grad_norm": 0.05256589261484946, "learning_rate": 0.0001078662044844426, "loss": 0.8407, "step": 139080 }, { "epoch": 2.4419319159395356, "grad_norm": 0.046783484466326526, "learning_rate": 0.00010785508764218022, "loss": 0.8399, "step": 139090 }, { "epoch": 2.442107480819537, "grad_norm": 0.05762469981618379, "learning_rate": 0.00010784397071306257, "loss": 0.8409, "step": 139100 }, { "epoch": 2.442283045699538, "grad_norm": 0.05040480210817086, "learning_rate": 0.00010783285369723051, "loss": 0.843, "step": 139110 }, { "epoch": 2.4424586105795396, "grad_norm": 0.05027694423494546, "learning_rate": 0.00010782173659482489, "loss": 0.8459, "step": 139120 }, { "epoch": 2.442634175459541, "grad_norm": 0.07476427061334603, "learning_rate": 0.00010781061940598653, "loss": 0.8423, "step": 139130 }, { "epoch": 2.4428097403395426, "grad_norm": 0.06867658105582809, "learning_rate": 0.00010779950213085632, "loss": 0.8433, "step": 139140 }, { "epoch": 2.442985305219544, "grad_norm": 0.04846270962573446, "learning_rate": 0.00010778838476957513, "loss": 0.8406, "step": 139150 }, { "epoch": 2.443160870099545, "grad_norm": 0.059248974462294525, "learning_rate": 0.00010777726732228381, "loss": 0.8457, "step": 139160 }, { "epoch": 2.4433364349795466, "grad_norm": 0.056016148580251904, "learning_rate": 0.00010776614978912323, "loss": 0.8593, "step": 139170 }, { "epoch": 2.443511999859548, "grad_norm": 0.06072270956571663, "learning_rate": 0.0001077550321702342, "loss": 0.8384, "step": 139180 }, { "epoch": 2.4436875647395495, "grad_norm": 0.057867928940824265, "learning_rate": 0.00010774391446575762, "loss": 0.8404, "step": 139190 }, { "epoch": 2.443863129619551, "grad_norm": 0.05580598281208407, "learning_rate": 0.00010773279667583433, "loss": 0.8378, "step": 139200 }, { "epoch": 2.4440386944995525, "grad_norm": 0.06671792313698893, "learning_rate": 0.00010772167880060524, "loss": 0.839, "step": 139210 }, { "epoch": 2.444214259379554, "grad_norm": 0.061139927499415256, "learning_rate": 0.00010771056084021121, "loss": 0.8467, "step": 139220 }, { "epoch": 2.444389824259555, "grad_norm": 0.07874127769888122, "learning_rate": 0.00010769944279479309, "loss": 0.8455, "step": 139230 }, { "epoch": 2.4445653891395565, "grad_norm": 0.0503824821463882, "learning_rate": 0.00010768832466449175, "loss": 0.8455, "step": 139240 }, { "epoch": 2.444740954019558, "grad_norm": 0.05877565760474133, "learning_rate": 0.00010767720644944804, "loss": 0.8466, "step": 139250 }, { "epoch": 2.4449165188995594, "grad_norm": 0.07637042788921777, "learning_rate": 0.00010766608814980281, "loss": 0.845, "step": 139260 }, { "epoch": 2.445092083779561, "grad_norm": 0.08255299071877574, "learning_rate": 0.000107654969765697, "loss": 0.8379, "step": 139270 }, { "epoch": 2.445267648659562, "grad_norm": 0.07344950242724003, "learning_rate": 0.00010764385129727146, "loss": 0.8472, "step": 139280 }, { "epoch": 2.4454432135395634, "grad_norm": 0.05031710871691274, "learning_rate": 0.00010763273274466703, "loss": 0.8478, "step": 139290 }, { "epoch": 2.445618778419565, "grad_norm": 0.06988529647182527, "learning_rate": 0.00010762161410802461, "loss": 0.8459, "step": 139300 }, { "epoch": 2.4457943432995664, "grad_norm": 0.06309485769650092, "learning_rate": 0.00010761049538748504, "loss": 0.8421, "step": 139310 }, { "epoch": 2.445969908179568, "grad_norm": 0.05746105316261229, "learning_rate": 0.00010759937658318929, "loss": 0.8397, "step": 139320 }, { "epoch": 2.4461454730595693, "grad_norm": 0.06613815557750234, "learning_rate": 0.00010758825769527814, "loss": 0.8376, "step": 139330 }, { "epoch": 2.446321037939571, "grad_norm": 0.04629324957719398, "learning_rate": 0.0001075771387238925, "loss": 0.8369, "step": 139340 }, { "epoch": 2.446496602819572, "grad_norm": 0.059800457752016964, "learning_rate": 0.00010756601966917325, "loss": 0.8503, "step": 139350 }, { "epoch": 2.4466721676995733, "grad_norm": 0.06894743527251784, "learning_rate": 0.00010755490053126129, "loss": 0.8402, "step": 139360 }, { "epoch": 2.4468477325795748, "grad_norm": 0.05899519968451426, "learning_rate": 0.00010754378131029745, "loss": 0.8431, "step": 139370 }, { "epoch": 2.4470232974595763, "grad_norm": 0.04389934922132425, "learning_rate": 0.00010753266200642266, "loss": 0.8407, "step": 139380 }, { "epoch": 2.4471988623395777, "grad_norm": 0.07835992249271169, "learning_rate": 0.00010752154261977782, "loss": 0.8445, "step": 139390 }, { "epoch": 2.4473744272195788, "grad_norm": 0.06022488531460472, "learning_rate": 0.00010751042315050376, "loss": 0.8481, "step": 139400 }, { "epoch": 2.4475499920995802, "grad_norm": 0.05457302462371137, "learning_rate": 0.00010749930359874141, "loss": 0.8436, "step": 139410 }, { "epoch": 2.4477255569795817, "grad_norm": 0.057151945453919675, "learning_rate": 0.00010748818396463161, "loss": 0.8409, "step": 139420 }, { "epoch": 2.447901121859583, "grad_norm": 0.0663709911507082, "learning_rate": 0.00010747706424831527, "loss": 0.8446, "step": 139430 }, { "epoch": 2.4480766867395847, "grad_norm": 0.07443112275834046, "learning_rate": 0.00010746594444993331, "loss": 0.8482, "step": 139440 }, { "epoch": 2.448252251619586, "grad_norm": 0.04682379529695092, "learning_rate": 0.00010745482456962661, "loss": 0.8474, "step": 139450 }, { "epoch": 2.4484278164995876, "grad_norm": 0.054350496034104696, "learning_rate": 0.00010744370460753604, "loss": 0.8431, "step": 139460 }, { "epoch": 2.4486033813795887, "grad_norm": 0.04889709139767008, "learning_rate": 0.00010743258456380246, "loss": 0.8436, "step": 139470 }, { "epoch": 2.44877894625959, "grad_norm": 0.06502153926833601, "learning_rate": 0.0001074214644385668, "loss": 0.8443, "step": 139480 }, { "epoch": 2.4489545111395916, "grad_norm": 0.05635381529994116, "learning_rate": 0.00010741034423196999, "loss": 0.8557, "step": 139490 }, { "epoch": 2.449130076019593, "grad_norm": 0.08438780419698313, "learning_rate": 0.00010739922394415287, "loss": 0.839, "step": 139500 }, { "epoch": 2.4493056408995946, "grad_norm": 0.0571624825043205, "learning_rate": 0.00010738810357525637, "loss": 0.8422, "step": 139510 }, { "epoch": 2.4494812057795956, "grad_norm": 0.05011519750369978, "learning_rate": 0.00010737698312542135, "loss": 0.8472, "step": 139520 }, { "epoch": 2.449656770659597, "grad_norm": 0.04631353702673392, "learning_rate": 0.00010736586259478876, "loss": 0.8442, "step": 139530 }, { "epoch": 2.4498323355395986, "grad_norm": 0.06406254991843116, "learning_rate": 0.00010735474198349946, "loss": 0.8364, "step": 139540 }, { "epoch": 2.4500079004196, "grad_norm": 0.0486210269555218, "learning_rate": 0.00010734362129169435, "loss": 0.8464, "step": 139550 }, { "epoch": 2.4501834652996015, "grad_norm": 0.05889301449051244, "learning_rate": 0.00010733250051951435, "loss": 0.844, "step": 139560 }, { "epoch": 2.450359030179603, "grad_norm": 0.048333387941989346, "learning_rate": 0.00010732137966710037, "loss": 0.84, "step": 139570 }, { "epoch": 2.4505345950596045, "grad_norm": 0.04713292081938731, "learning_rate": 0.00010731025873459328, "loss": 0.8451, "step": 139580 }, { "epoch": 2.4507101599396055, "grad_norm": 0.056924708073910736, "learning_rate": 0.00010729913772213401, "loss": 0.839, "step": 139590 }, { "epoch": 2.450885724819607, "grad_norm": 0.06018799719979223, "learning_rate": 0.00010728801662986343, "loss": 0.8451, "step": 139600 }, { "epoch": 2.4510612896996085, "grad_norm": 0.0408115932220194, "learning_rate": 0.0001072768954579225, "loss": 0.8402, "step": 139610 }, { "epoch": 2.45123685457961, "grad_norm": 0.04433150980285789, "learning_rate": 0.00010726577420645215, "loss": 0.851, "step": 139620 }, { "epoch": 2.4514124194596114, "grad_norm": 0.04736806717660565, "learning_rate": 0.00010725465287559317, "loss": 0.8488, "step": 139630 }, { "epoch": 2.451587984339613, "grad_norm": 0.06159540430011085, "learning_rate": 0.00010724353146548656, "loss": 0.8448, "step": 139640 }, { "epoch": 2.451763549219614, "grad_norm": 0.062310198753876334, "learning_rate": 0.00010723240997627319, "loss": 0.8424, "step": 139650 }, { "epoch": 2.4519391140996154, "grad_norm": 0.04915562763322431, "learning_rate": 0.00010722128840809402, "loss": 0.8491, "step": 139660 }, { "epoch": 2.452114678979617, "grad_norm": 0.04448373855444157, "learning_rate": 0.00010721016676108992, "loss": 0.8504, "step": 139670 }, { "epoch": 2.4522902438596184, "grad_norm": 0.0497512621092882, "learning_rate": 0.00010719904503540186, "loss": 0.8433, "step": 139680 }, { "epoch": 2.45246580873962, "grad_norm": 0.05908373833454216, "learning_rate": 0.00010718792323117063, "loss": 0.8479, "step": 139690 }, { "epoch": 2.4526413736196213, "grad_norm": 0.05822968605100334, "learning_rate": 0.00010717680134853731, "loss": 0.8424, "step": 139700 }, { "epoch": 2.4528169384996223, "grad_norm": 0.07430365054800442, "learning_rate": 0.00010716567938764268, "loss": 0.8467, "step": 139710 }, { "epoch": 2.452992503379624, "grad_norm": 0.05164788271239653, "learning_rate": 0.0001071545573486277, "loss": 0.845, "step": 139720 }, { "epoch": 2.4531680682596253, "grad_norm": 0.052325976726875355, "learning_rate": 0.00010714343523163333, "loss": 0.8399, "step": 139730 }, { "epoch": 2.453343633139627, "grad_norm": 0.044152130008538966, "learning_rate": 0.00010713231303680045, "loss": 0.8472, "step": 139740 }, { "epoch": 2.4535191980196283, "grad_norm": 0.067258984639649, "learning_rate": 0.00010712119076427, "loss": 0.8453, "step": 139750 }, { "epoch": 2.4536947628996297, "grad_norm": 0.06618982590503078, "learning_rate": 0.00010711006841418287, "loss": 0.8379, "step": 139760 }, { "epoch": 2.453870327779631, "grad_norm": 0.07709192464954004, "learning_rate": 0.00010709894598668, "loss": 0.8421, "step": 139770 }, { "epoch": 2.4540458926596322, "grad_norm": 0.05057556913178446, "learning_rate": 0.00010708782348190231, "loss": 0.8491, "step": 139780 }, { "epoch": 2.4542214575396337, "grad_norm": 0.048908212356533864, "learning_rate": 0.00010707670089999077, "loss": 0.8443, "step": 139790 }, { "epoch": 2.454397022419635, "grad_norm": 0.04795269682403395, "learning_rate": 0.00010706557824108627, "loss": 0.8391, "step": 139800 }, { "epoch": 2.4545725872996367, "grad_norm": 0.05172809334854762, "learning_rate": 0.00010705445550532969, "loss": 0.8458, "step": 139810 }, { "epoch": 2.454748152179638, "grad_norm": 0.07085929201686694, "learning_rate": 0.000107043332692862, "loss": 0.844, "step": 139820 }, { "epoch": 2.454923717059639, "grad_norm": 0.06059013155531974, "learning_rate": 0.00010703220980382414, "loss": 0.8456, "step": 139830 }, { "epoch": 2.4550992819396407, "grad_norm": 0.06529095887593535, "learning_rate": 0.00010702108683835703, "loss": 0.8441, "step": 139840 }, { "epoch": 2.455274846819642, "grad_norm": 0.0690165900313399, "learning_rate": 0.00010700996379660161, "loss": 0.8414, "step": 139850 }, { "epoch": 2.4554504116996436, "grad_norm": 0.06595988284417453, "learning_rate": 0.00010699884067869874, "loss": 0.8431, "step": 139860 }, { "epoch": 2.455625976579645, "grad_norm": 0.06283731092037313, "learning_rate": 0.00010698771748478949, "loss": 0.8493, "step": 139870 }, { "epoch": 2.4558015414596466, "grad_norm": 0.08066593643948584, "learning_rate": 0.00010697659421501462, "loss": 0.8393, "step": 139880 }, { "epoch": 2.455977106339648, "grad_norm": 0.052301542163509955, "learning_rate": 0.0001069654708695152, "loss": 0.8472, "step": 139890 }, { "epoch": 2.456152671219649, "grad_norm": 0.04623195786201849, "learning_rate": 0.00010695434744843213, "loss": 0.8402, "step": 139900 }, { "epoch": 2.4563282360996506, "grad_norm": 0.0805115226138945, "learning_rate": 0.00010694322395190635, "loss": 0.8438, "step": 139910 }, { "epoch": 2.456503800979652, "grad_norm": 0.06026044303460531, "learning_rate": 0.00010693210038007877, "loss": 0.8475, "step": 139920 }, { "epoch": 2.4566793658596535, "grad_norm": 0.05078631746966634, "learning_rate": 0.00010692097673309034, "loss": 0.8502, "step": 139930 }, { "epoch": 2.456854930739655, "grad_norm": 0.0643617932772678, "learning_rate": 0.00010690985301108196, "loss": 0.8466, "step": 139940 }, { "epoch": 2.457030495619656, "grad_norm": 0.06556317712456211, "learning_rate": 0.00010689872921419466, "loss": 0.8417, "step": 139950 }, { "epoch": 2.4572060604996575, "grad_norm": 0.07345260139864503, "learning_rate": 0.00010688760534256932, "loss": 0.8424, "step": 139960 }, { "epoch": 2.457381625379659, "grad_norm": 0.05073824527104381, "learning_rate": 0.00010687648139634692, "loss": 0.8392, "step": 139970 }, { "epoch": 2.4575571902596605, "grad_norm": 0.06503089392318616, "learning_rate": 0.00010686535737566831, "loss": 0.8454, "step": 139980 }, { "epoch": 2.457732755139662, "grad_norm": 0.048056896387641136, "learning_rate": 0.00010685423328067453, "loss": 0.8402, "step": 139990 }, { "epoch": 2.4579083200196634, "grad_norm": 0.04976786769663579, "learning_rate": 0.0001068431091115065, "loss": 0.8443, "step": 140000 }, { "epoch": 2.458083884899665, "grad_norm": 0.07369271714501902, "learning_rate": 0.00010683198486830515, "loss": 0.8433, "step": 140010 }, { "epoch": 2.458259449779666, "grad_norm": 0.06083022212511011, "learning_rate": 0.00010682086055121144, "loss": 0.8459, "step": 140020 }, { "epoch": 2.4584350146596674, "grad_norm": 0.04824501791334105, "learning_rate": 0.0001068097361603663, "loss": 0.8382, "step": 140030 }, { "epoch": 2.458610579539669, "grad_norm": 0.07646471778210112, "learning_rate": 0.00010679861169591068, "loss": 0.8445, "step": 140040 }, { "epoch": 2.4587861444196704, "grad_norm": 0.051661833613632316, "learning_rate": 0.00010678748715798556, "loss": 0.8414, "step": 140050 }, { "epoch": 2.458961709299672, "grad_norm": 0.04688285571207312, "learning_rate": 0.00010677636254673185, "loss": 0.8429, "step": 140060 }, { "epoch": 2.459137274179673, "grad_norm": 0.06336325371876984, "learning_rate": 0.00010676523786229053, "loss": 0.8517, "step": 140070 }, { "epoch": 2.4593128390596743, "grad_norm": 0.05001639058052521, "learning_rate": 0.00010675411310480254, "loss": 0.8328, "step": 140080 }, { "epoch": 2.459488403939676, "grad_norm": 0.06906854364577189, "learning_rate": 0.00010674298827440884, "loss": 0.8481, "step": 140090 }, { "epoch": 2.4596639688196773, "grad_norm": 0.056591379712198964, "learning_rate": 0.00010673186337125038, "loss": 0.8475, "step": 140100 }, { "epoch": 2.459839533699679, "grad_norm": 0.06364305838206809, "learning_rate": 0.0001067207383954681, "loss": 0.8457, "step": 140110 }, { "epoch": 2.4600150985796803, "grad_norm": 0.056824893808396566, "learning_rate": 0.000106709613347203, "loss": 0.8441, "step": 140120 }, { "epoch": 2.4601906634596817, "grad_norm": 0.05906092460069395, "learning_rate": 0.00010669848822659597, "loss": 0.8375, "step": 140130 }, { "epoch": 2.4603662283396828, "grad_norm": 0.06818137287497983, "learning_rate": 0.00010668736303378801, "loss": 0.8326, "step": 140140 }, { "epoch": 2.4605417932196842, "grad_norm": 0.05768943424607159, "learning_rate": 0.00010667623776892008, "loss": 0.8406, "step": 140150 }, { "epoch": 2.4607173580996857, "grad_norm": 0.058497698834913466, "learning_rate": 0.00010666511243213311, "loss": 0.8469, "step": 140160 }, { "epoch": 2.460892922979687, "grad_norm": 0.05264638796787191, "learning_rate": 0.00010665398702356809, "loss": 0.8477, "step": 140170 }, { "epoch": 2.4610684878596887, "grad_norm": 0.0456203242374375, "learning_rate": 0.00010664286154336599, "loss": 0.8468, "step": 140180 }, { "epoch": 2.4612440527396897, "grad_norm": 0.05136684409329226, "learning_rate": 0.00010663173599166773, "loss": 0.8481, "step": 140190 }, { "epoch": 2.461419617619691, "grad_norm": 0.057581485687536765, "learning_rate": 0.0001066206103686143, "loss": 0.8435, "step": 140200 }, { "epoch": 2.4615951824996927, "grad_norm": 0.05018572870773924, "learning_rate": 0.00010660948467434669, "loss": 0.8426, "step": 140210 }, { "epoch": 2.461770747379694, "grad_norm": 0.04593932196783975, "learning_rate": 0.00010659835890900579, "loss": 0.8423, "step": 140220 }, { "epoch": 2.4619463122596956, "grad_norm": 0.04627495889242524, "learning_rate": 0.0001065872330727326, "loss": 0.8381, "step": 140230 }, { "epoch": 2.462121877139697, "grad_norm": 0.04915499944820688, "learning_rate": 0.00010657610716566812, "loss": 0.8445, "step": 140240 }, { "epoch": 2.4622974420196986, "grad_norm": 0.05943464754398215, "learning_rate": 0.0001065649811879533, "loss": 0.846, "step": 140250 }, { "epoch": 2.4624730068996996, "grad_norm": 0.07062770142553829, "learning_rate": 0.00010655385513972914, "loss": 0.8496, "step": 140260 }, { "epoch": 2.462648571779701, "grad_norm": 0.040124996450011524, "learning_rate": 0.00010654272902113649, "loss": 0.8463, "step": 140270 }, { "epoch": 2.4628241366597026, "grad_norm": 0.05792174367063797, "learning_rate": 0.00010653160283231643, "loss": 0.8491, "step": 140280 }, { "epoch": 2.462999701539704, "grad_norm": 0.05505362131202561, "learning_rate": 0.00010652047657340992, "loss": 0.8424, "step": 140290 }, { "epoch": 2.4631752664197055, "grad_norm": 0.061239062437517686, "learning_rate": 0.0001065093502445579, "loss": 0.8469, "step": 140300 }, { "epoch": 2.463350831299707, "grad_norm": 0.0631183649163558, "learning_rate": 0.00010649822384590138, "loss": 0.84, "step": 140310 }, { "epoch": 2.463526396179708, "grad_norm": 0.056628513637462685, "learning_rate": 0.00010648709737758127, "loss": 0.8448, "step": 140320 }, { "epoch": 2.4637019610597095, "grad_norm": 0.06305001375193917, "learning_rate": 0.00010647597083973861, "loss": 0.8427, "step": 140330 }, { "epoch": 2.463877525939711, "grad_norm": 0.05824184638462682, "learning_rate": 0.00010646484423251433, "loss": 0.8372, "step": 140340 }, { "epoch": 2.4640530908197125, "grad_norm": 0.051284903487539106, "learning_rate": 0.00010645371755604944, "loss": 0.8361, "step": 140350 }, { "epoch": 2.464228655699714, "grad_norm": 0.03929949098560273, "learning_rate": 0.00010644259081048493, "loss": 0.8459, "step": 140360 }, { "epoch": 2.4644042205797154, "grad_norm": 0.04718904480172594, "learning_rate": 0.00010643146399596171, "loss": 0.8485, "step": 140370 }, { "epoch": 2.4645797854597165, "grad_norm": 0.06753123780624587, "learning_rate": 0.00010642033711262083, "loss": 0.8463, "step": 140380 }, { "epoch": 2.464755350339718, "grad_norm": 0.05679989376863633, "learning_rate": 0.00010640921016060321, "loss": 0.8397, "step": 140390 }, { "epoch": 2.4649309152197194, "grad_norm": 0.05774899346995416, "learning_rate": 0.00010639808314004989, "loss": 0.8348, "step": 140400 }, { "epoch": 2.465106480099721, "grad_norm": 0.057653930387040975, "learning_rate": 0.0001063869560511018, "loss": 0.8455, "step": 140410 }, { "epoch": 2.4652820449797224, "grad_norm": 0.060708465117583224, "learning_rate": 0.00010637582889389997, "loss": 0.849, "step": 140420 }, { "epoch": 2.465457609859724, "grad_norm": 0.06102384998283003, "learning_rate": 0.00010636470166858534, "loss": 0.8351, "step": 140430 }, { "epoch": 2.465633174739725, "grad_norm": 0.07209260420656376, "learning_rate": 0.00010635357437529891, "loss": 0.8386, "step": 140440 }, { "epoch": 2.4658087396197264, "grad_norm": 0.05268788964748818, "learning_rate": 0.00010634244701418166, "loss": 0.8445, "step": 140450 }, { "epoch": 2.465984304499728, "grad_norm": 0.05736672720473146, "learning_rate": 0.00010633131958537458, "loss": 0.8414, "step": 140460 }, { "epoch": 2.4661598693797293, "grad_norm": 0.04747307448489311, "learning_rate": 0.0001063201920890187, "loss": 0.8511, "step": 140470 }, { "epoch": 2.466335434259731, "grad_norm": 0.05398558295394328, "learning_rate": 0.00010630906452525493, "loss": 0.8415, "step": 140480 }, { "epoch": 2.4665109991397323, "grad_norm": 0.06298292879931912, "learning_rate": 0.00010629793689422429, "loss": 0.8489, "step": 140490 }, { "epoch": 2.4666865640197333, "grad_norm": 0.06067057305070187, "learning_rate": 0.0001062868091960678, "loss": 0.8488, "step": 140500 }, { "epoch": 2.4668621288997348, "grad_norm": 0.05360498779794087, "learning_rate": 0.00010627568143092638, "loss": 0.8486, "step": 140510 }, { "epoch": 2.4670376937797363, "grad_norm": 0.04867349810354595, "learning_rate": 0.00010626455359894111, "loss": 0.8436, "step": 140520 }, { "epoch": 2.4672132586597377, "grad_norm": 0.06381058610771635, "learning_rate": 0.00010625342570025294, "loss": 0.8513, "step": 140530 }, { "epoch": 2.467388823539739, "grad_norm": 0.055628125595396545, "learning_rate": 0.00010624229773500285, "loss": 0.8488, "step": 140540 }, { "epoch": 2.4675643884197407, "grad_norm": 0.04716913633532676, "learning_rate": 0.00010623116970333182, "loss": 0.8488, "step": 140550 }, { "epoch": 2.467739953299742, "grad_norm": 0.06320933744723864, "learning_rate": 0.00010622004160538087, "loss": 0.8522, "step": 140560 }, { "epoch": 2.467915518179743, "grad_norm": 0.045697785736836555, "learning_rate": 0.000106208913441291, "loss": 0.8417, "step": 140570 }, { "epoch": 2.4680910830597447, "grad_norm": 0.04705346592468497, "learning_rate": 0.0001061977852112032, "loss": 0.838, "step": 140580 }, { "epoch": 2.468266647939746, "grad_norm": 0.05529540547491532, "learning_rate": 0.00010618665691525851, "loss": 0.8418, "step": 140590 }, { "epoch": 2.4684422128197476, "grad_norm": 0.06581325951316432, "learning_rate": 0.00010617552855359783, "loss": 0.8384, "step": 140600 }, { "epoch": 2.468617777699749, "grad_norm": 0.054684438943840355, "learning_rate": 0.00010616440012636221, "loss": 0.8449, "step": 140610 }, { "epoch": 2.46879334257975, "grad_norm": 0.08115973180295742, "learning_rate": 0.00010615327163369267, "loss": 0.8439, "step": 140620 }, { "epoch": 2.4689689074597516, "grad_norm": 0.061246944943384306, "learning_rate": 0.00010614214307573018, "loss": 0.8409, "step": 140630 }, { "epoch": 2.469144472339753, "grad_norm": 0.07187081484673698, "learning_rate": 0.00010613101445261577, "loss": 0.845, "step": 140640 }, { "epoch": 2.4693200372197546, "grad_norm": 0.05275786540244002, "learning_rate": 0.0001061198857644904, "loss": 0.8425, "step": 140650 }, { "epoch": 2.469495602099756, "grad_norm": 0.0582187346093273, "learning_rate": 0.00010610875701149512, "loss": 0.8487, "step": 140660 }, { "epoch": 2.4696711669797575, "grad_norm": 0.10384059066644377, "learning_rate": 0.00010609762819377091, "loss": 0.839, "step": 140670 }, { "epoch": 2.469846731859759, "grad_norm": 0.049092745960341404, "learning_rate": 0.00010608649931145873, "loss": 0.8462, "step": 140680 }, { "epoch": 2.47002229673976, "grad_norm": 0.0461885101880256, "learning_rate": 0.00010607537036469967, "loss": 0.8449, "step": 140690 }, { "epoch": 2.4701978616197615, "grad_norm": 0.0671182150078732, "learning_rate": 0.0001060642413536347, "loss": 0.8389, "step": 140700 }, { "epoch": 2.470373426499763, "grad_norm": 0.056659454820161055, "learning_rate": 0.00010605311227840478, "loss": 0.843, "step": 140710 }, { "epoch": 2.4705489913797645, "grad_norm": 0.04209201533564868, "learning_rate": 0.00010604198313915102, "loss": 0.8356, "step": 140720 }, { "epoch": 2.470724556259766, "grad_norm": 0.05953341310483423, "learning_rate": 0.00010603085393601428, "loss": 0.8449, "step": 140730 }, { "epoch": 2.470900121139767, "grad_norm": 0.06480318883827578, "learning_rate": 0.00010601972466913572, "loss": 0.8397, "step": 140740 }, { "epoch": 2.4710756860197685, "grad_norm": 0.052237210709336644, "learning_rate": 0.00010600859533865628, "loss": 0.8442, "step": 140750 }, { "epoch": 2.47125125089977, "grad_norm": 0.05479178833347863, "learning_rate": 0.00010599746594471698, "loss": 0.8387, "step": 140760 }, { "epoch": 2.4714268157797714, "grad_norm": 0.05997774240027584, "learning_rate": 0.00010598633648745884, "loss": 0.8503, "step": 140770 }, { "epoch": 2.471602380659773, "grad_norm": 0.06467788365725892, "learning_rate": 0.00010597520696702281, "loss": 0.8477, "step": 140780 }, { "epoch": 2.4717779455397744, "grad_norm": 0.05573733461502403, "learning_rate": 0.00010596407738354998, "loss": 0.8419, "step": 140790 }, { "epoch": 2.471953510419776, "grad_norm": 0.05514512088645426, "learning_rate": 0.00010595294773718134, "loss": 0.8466, "step": 140800 }, { "epoch": 2.472129075299777, "grad_norm": 0.05504376676220845, "learning_rate": 0.00010594181802805794, "loss": 0.8388, "step": 140810 }, { "epoch": 2.4723046401797784, "grad_norm": 0.04338916309511015, "learning_rate": 0.00010593068825632071, "loss": 0.8396, "step": 140820 }, { "epoch": 2.47248020505978, "grad_norm": 0.07260980626060104, "learning_rate": 0.00010591955842211072, "loss": 0.8493, "step": 140830 }, { "epoch": 2.4726557699397813, "grad_norm": 0.06604089493741674, "learning_rate": 0.000105908428525569, "loss": 0.8462, "step": 140840 }, { "epoch": 2.472831334819783, "grad_norm": 0.04499487087626009, "learning_rate": 0.00010589729856683652, "loss": 0.8488, "step": 140850 }, { "epoch": 2.473006899699784, "grad_norm": 0.053754727000872, "learning_rate": 0.00010588616854605437, "loss": 0.8415, "step": 140860 }, { "epoch": 2.4731824645797853, "grad_norm": 0.065711081364765, "learning_rate": 0.00010587503846336353, "loss": 0.8436, "step": 140870 }, { "epoch": 2.4733580294597868, "grad_norm": 0.07720108047035099, "learning_rate": 0.00010586390831890498, "loss": 0.8484, "step": 140880 }, { "epoch": 2.4735335943397883, "grad_norm": 0.0846952594508885, "learning_rate": 0.0001058527781128198, "loss": 0.8451, "step": 140890 }, { "epoch": 2.4737091592197897, "grad_norm": 0.06282758156617851, "learning_rate": 0.00010584164784524897, "loss": 0.8444, "step": 140900 }, { "epoch": 2.473884724099791, "grad_norm": 0.06111602007508314, "learning_rate": 0.00010583051751633355, "loss": 0.8507, "step": 140910 }, { "epoch": 2.4740602889797927, "grad_norm": 0.06329279421229739, "learning_rate": 0.00010581938712621456, "loss": 0.8522, "step": 140920 }, { "epoch": 2.4742358538597937, "grad_norm": 0.057021633765020965, "learning_rate": 0.00010580825667503301, "loss": 0.8442, "step": 140930 }, { "epoch": 2.474411418739795, "grad_norm": 0.05589839633214291, "learning_rate": 0.00010579712616292992, "loss": 0.8436, "step": 140940 }, { "epoch": 2.4745869836197967, "grad_norm": 0.06467841074617604, "learning_rate": 0.0001057859955900463, "loss": 0.8425, "step": 140950 }, { "epoch": 2.474762548499798, "grad_norm": 0.05650440382613081, "learning_rate": 0.0001057748649565232, "loss": 0.8409, "step": 140960 }, { "epoch": 2.4749381133797996, "grad_norm": 0.06435604600945766, "learning_rate": 0.00010576373426250168, "loss": 0.8414, "step": 140970 }, { "epoch": 2.4751136782598007, "grad_norm": 0.0556899292667784, "learning_rate": 0.00010575260350812271, "loss": 0.8381, "step": 140980 }, { "epoch": 2.475289243139802, "grad_norm": 0.05651885265866803, "learning_rate": 0.00010574147269352736, "loss": 0.8462, "step": 140990 }, { "epoch": 2.4754648080198036, "grad_norm": 0.05427047039667237, "learning_rate": 0.00010573034181885663, "loss": 0.8417, "step": 141000 }, { "epoch": 2.475640372899805, "grad_norm": 0.059441529982651814, "learning_rate": 0.00010571921088425158, "loss": 0.8536, "step": 141010 }, { "epoch": 2.4758159377798066, "grad_norm": 0.048262839928189256, "learning_rate": 0.00010570807988985321, "loss": 0.8447, "step": 141020 }, { "epoch": 2.475991502659808, "grad_norm": 0.07994261724134998, "learning_rate": 0.00010569694883580257, "loss": 0.8441, "step": 141030 }, { "epoch": 2.4761670675398095, "grad_norm": 0.04298441126560902, "learning_rate": 0.00010568581772224068, "loss": 0.8437, "step": 141040 }, { "epoch": 2.4763426324198106, "grad_norm": 0.05987272350899147, "learning_rate": 0.00010567468654930857, "loss": 0.849, "step": 141050 }, { "epoch": 2.476518197299812, "grad_norm": 0.061944943996116414, "learning_rate": 0.0001056635553171473, "loss": 0.8512, "step": 141060 }, { "epoch": 2.4766937621798135, "grad_norm": 0.054952410326743556, "learning_rate": 0.00010565242402589787, "loss": 0.8312, "step": 141070 }, { "epoch": 2.476869327059815, "grad_norm": 0.056759187413332704, "learning_rate": 0.00010564129267570136, "loss": 0.8505, "step": 141080 }, { "epoch": 2.4770448919398165, "grad_norm": 0.07770775299613315, "learning_rate": 0.00010563016126669878, "loss": 0.8462, "step": 141090 }, { "epoch": 2.477220456819818, "grad_norm": 0.049751009824943945, "learning_rate": 0.00010561902979903118, "loss": 0.8413, "step": 141100 }, { "epoch": 2.477396021699819, "grad_norm": 0.043105301079509224, "learning_rate": 0.00010560789827283957, "loss": 0.8377, "step": 141110 }, { "epoch": 2.4775715865798205, "grad_norm": 0.0467546724067557, "learning_rate": 0.000105596766688265, "loss": 0.8399, "step": 141120 }, { "epoch": 2.477747151459822, "grad_norm": 0.05514519582483944, "learning_rate": 0.00010558563504544852, "loss": 0.847, "step": 141130 }, { "epoch": 2.4779227163398234, "grad_norm": 0.06830067806174461, "learning_rate": 0.00010557450334453118, "loss": 0.8504, "step": 141140 }, { "epoch": 2.478098281219825, "grad_norm": 0.061114882959954674, "learning_rate": 0.00010556337158565399, "loss": 0.8412, "step": 141150 }, { "epoch": 2.4782738460998264, "grad_norm": 0.053193481611393784, "learning_rate": 0.00010555223976895803, "loss": 0.848, "step": 141160 }, { "epoch": 2.4784494109798274, "grad_norm": 0.04315212452047698, "learning_rate": 0.0001055411078945843, "loss": 0.8511, "step": 141170 }, { "epoch": 2.478624975859829, "grad_norm": 0.04978606686372905, "learning_rate": 0.00010552997596267386, "loss": 0.8476, "step": 141180 }, { "epoch": 2.4788005407398304, "grad_norm": 0.0515367665537428, "learning_rate": 0.00010551884397336777, "loss": 0.845, "step": 141190 }, { "epoch": 2.478976105619832, "grad_norm": 0.05362264688854677, "learning_rate": 0.00010550771192680703, "loss": 0.8497, "step": 141200 }, { "epoch": 2.4791516704998333, "grad_norm": 0.05370879271061718, "learning_rate": 0.00010549657982313277, "loss": 0.8455, "step": 141210 }, { "epoch": 2.479327235379835, "grad_norm": 0.05048036647910976, "learning_rate": 0.00010548544766248596, "loss": 0.8435, "step": 141220 }, { "epoch": 2.4795028002598363, "grad_norm": 0.04682822215102697, "learning_rate": 0.00010547431544500763, "loss": 0.8406, "step": 141230 }, { "epoch": 2.4796783651398373, "grad_norm": 0.07127245120496307, "learning_rate": 0.00010546318317083889, "loss": 0.8385, "step": 141240 }, { "epoch": 2.479853930019839, "grad_norm": 0.053332583429102985, "learning_rate": 0.00010545205084012078, "loss": 0.8346, "step": 141250 }, { "epoch": 2.4800294948998403, "grad_norm": 0.06590022288037752, "learning_rate": 0.0001054409184529943, "loss": 0.8404, "step": 141260 }, { "epoch": 2.4802050597798417, "grad_norm": 0.05448751764938702, "learning_rate": 0.00010542978600960058, "loss": 0.8429, "step": 141270 }, { "epoch": 2.480380624659843, "grad_norm": 0.054737463745841715, "learning_rate": 0.0001054186535100806, "loss": 0.8437, "step": 141280 }, { "epoch": 2.4805561895398442, "grad_norm": 0.05178535694330406, "learning_rate": 0.00010540752095457542, "loss": 0.8446, "step": 141290 }, { "epoch": 2.4807317544198457, "grad_norm": 0.04882629512268577, "learning_rate": 0.00010539638834322611, "loss": 0.8346, "step": 141300 }, { "epoch": 2.480907319299847, "grad_norm": 0.049036567381972956, "learning_rate": 0.00010538525567617372, "loss": 0.8392, "step": 141310 }, { "epoch": 2.4810828841798487, "grad_norm": 0.04670606965632068, "learning_rate": 0.00010537412295355933, "loss": 0.8472, "step": 141320 }, { "epoch": 2.48125844905985, "grad_norm": 0.05167211926943529, "learning_rate": 0.00010536299017552393, "loss": 0.8407, "step": 141330 }, { "epoch": 2.4814340139398516, "grad_norm": 0.04943696673748868, "learning_rate": 0.00010535185734220861, "loss": 0.8429, "step": 141340 }, { "epoch": 2.481609578819853, "grad_norm": 0.06851660649862971, "learning_rate": 0.00010534072445375442, "loss": 0.8487, "step": 141350 }, { "epoch": 2.481785143699854, "grad_norm": 0.058449668045541456, "learning_rate": 0.00010532959151030244, "loss": 0.8452, "step": 141360 }, { "epoch": 2.4819607085798556, "grad_norm": 0.048919557138296925, "learning_rate": 0.00010531845851199367, "loss": 0.8408, "step": 141370 }, { "epoch": 2.482136273459857, "grad_norm": 0.07144592670136457, "learning_rate": 0.00010530732545896926, "loss": 0.847, "step": 141380 }, { "epoch": 2.4823118383398586, "grad_norm": 0.058942689103166794, "learning_rate": 0.00010529619235137013, "loss": 0.8412, "step": 141390 }, { "epoch": 2.48248740321986, "grad_norm": 0.05648432069405592, "learning_rate": 0.0001052850591893375, "loss": 0.8443, "step": 141400 }, { "epoch": 2.482662968099861, "grad_norm": 0.05877827191470621, "learning_rate": 0.00010527392597301229, "loss": 0.8421, "step": 141410 }, { "epoch": 2.4828385329798626, "grad_norm": 0.07137333275941961, "learning_rate": 0.00010526279270253563, "loss": 0.8471, "step": 141420 }, { "epoch": 2.483014097859864, "grad_norm": 0.053825859202638854, "learning_rate": 0.0001052516593780486, "loss": 0.8395, "step": 141430 }, { "epoch": 2.4831896627398655, "grad_norm": 0.05977659088501342, "learning_rate": 0.00010524052599969221, "loss": 0.8378, "step": 141440 }, { "epoch": 2.483365227619867, "grad_norm": 0.048555321826256215, "learning_rate": 0.00010522939256760758, "loss": 0.8465, "step": 141450 }, { "epoch": 2.4835407924998685, "grad_norm": 0.058678793088739134, "learning_rate": 0.00010521825908193568, "loss": 0.8495, "step": 141460 }, { "epoch": 2.48371635737987, "grad_norm": 0.07031408733843651, "learning_rate": 0.00010520712554281765, "loss": 0.8538, "step": 141470 }, { "epoch": 2.483891922259871, "grad_norm": 0.05297499022720643, "learning_rate": 0.00010519599195039453, "loss": 0.8393, "step": 141480 }, { "epoch": 2.4840674871398725, "grad_norm": 0.061894619964784, "learning_rate": 0.0001051848583048074, "loss": 0.8454, "step": 141490 }, { "epoch": 2.484243052019874, "grad_norm": 0.1058851132553602, "learning_rate": 0.0001051737246061973, "loss": 0.8397, "step": 141500 }, { "epoch": 2.4844186168998754, "grad_norm": 0.05070932117733203, "learning_rate": 0.00010516259085470533, "loss": 0.8395, "step": 141510 }, { "epoch": 2.484594181779877, "grad_norm": 0.05252368029975073, "learning_rate": 0.00010515145705047254, "loss": 0.8384, "step": 141520 }, { "epoch": 2.484769746659878, "grad_norm": 0.04977653668167214, "learning_rate": 0.00010514032319363993, "loss": 0.8486, "step": 141530 }, { "epoch": 2.4849453115398794, "grad_norm": 0.05886931517272005, "learning_rate": 0.00010512918928434867, "loss": 0.8478, "step": 141540 }, { "epoch": 2.485120876419881, "grad_norm": 0.058903552687155455, "learning_rate": 0.00010511805532273984, "loss": 0.8442, "step": 141550 }, { "epoch": 2.4852964412998824, "grad_norm": 0.051702075964993995, "learning_rate": 0.00010510692130895439, "loss": 0.8498, "step": 141560 }, { "epoch": 2.485472006179884, "grad_norm": 0.061092203925222764, "learning_rate": 0.0001050957872431335, "loss": 0.8435, "step": 141570 }, { "epoch": 2.4856475710598853, "grad_norm": 0.056700786199733164, "learning_rate": 0.00010508465312541818, "loss": 0.8431, "step": 141580 }, { "epoch": 2.485823135939887, "grad_norm": 0.07285358608374154, "learning_rate": 0.00010507351895594952, "loss": 0.844, "step": 141590 }, { "epoch": 2.485998700819888, "grad_norm": 0.04829673219344612, "learning_rate": 0.00010506238473486858, "loss": 0.842, "step": 141600 }, { "epoch": 2.4861742656998893, "grad_norm": 0.05536771182411283, "learning_rate": 0.0001050512504623165, "loss": 0.8481, "step": 141610 }, { "epoch": 2.486349830579891, "grad_norm": 0.07831034003997578, "learning_rate": 0.00010504011613843428, "loss": 0.8443, "step": 141620 }, { "epoch": 2.4865253954598923, "grad_norm": 0.06102218312137498, "learning_rate": 0.00010502898176336297, "loss": 0.8464, "step": 141630 }, { "epoch": 2.4867009603398937, "grad_norm": 0.04953658750584508, "learning_rate": 0.00010501784733724373, "loss": 0.8529, "step": 141640 }, { "epoch": 2.4868765252198948, "grad_norm": 0.05240776350286182, "learning_rate": 0.0001050067128602176, "loss": 0.8476, "step": 141650 }, { "epoch": 2.4870520900998963, "grad_norm": 0.05163337646103805, "learning_rate": 0.00010499557833242564, "loss": 0.8472, "step": 141660 }, { "epoch": 2.4872276549798977, "grad_norm": 0.05749833432983265, "learning_rate": 0.00010498444375400893, "loss": 0.8506, "step": 141670 }, { "epoch": 2.487403219859899, "grad_norm": 0.051480024393017104, "learning_rate": 0.00010497330912510857, "loss": 0.8451, "step": 141680 }, { "epoch": 2.4875787847399007, "grad_norm": 0.06899398663785598, "learning_rate": 0.00010496217444586562, "loss": 0.8417, "step": 141690 }, { "epoch": 2.487754349619902, "grad_norm": 0.057326765551918804, "learning_rate": 0.00010495103971642113, "loss": 0.843, "step": 141700 }, { "epoch": 2.4879299144999036, "grad_norm": 0.07227280375462652, "learning_rate": 0.00010493990493691626, "loss": 0.8496, "step": 141710 }, { "epoch": 2.4881054793799047, "grad_norm": 0.053331212678263164, "learning_rate": 0.00010492877010749199, "loss": 0.8422, "step": 141720 }, { "epoch": 2.488281044259906, "grad_norm": 0.04690911920317232, "learning_rate": 0.00010491763522828948, "loss": 0.8473, "step": 141730 }, { "epoch": 2.4884566091399076, "grad_norm": 0.05467108831596971, "learning_rate": 0.00010490650029944975, "loss": 0.8449, "step": 141740 }, { "epoch": 2.488632174019909, "grad_norm": 0.046484146120907004, "learning_rate": 0.00010489536532111393, "loss": 0.8456, "step": 141750 }, { "epoch": 2.4888077388999106, "grad_norm": 0.05034361195285806, "learning_rate": 0.00010488423029342306, "loss": 0.8394, "step": 141760 }, { "epoch": 2.488983303779912, "grad_norm": 0.057409100848781994, "learning_rate": 0.00010487309521651829, "loss": 0.8379, "step": 141770 }, { "epoch": 2.489158868659913, "grad_norm": 0.0508968444999565, "learning_rate": 0.00010486196009054068, "loss": 0.8476, "step": 141780 }, { "epoch": 2.4893344335399146, "grad_norm": 0.0742680209460331, "learning_rate": 0.00010485082491563127, "loss": 0.846, "step": 141790 }, { "epoch": 2.489509998419916, "grad_norm": 0.1304869517551756, "learning_rate": 0.00010483968969193115, "loss": 0.847, "step": 141800 }, { "epoch": 2.4896855632999175, "grad_norm": 0.09357684290269677, "learning_rate": 0.00010482855441958143, "loss": 0.8425, "step": 141810 }, { "epoch": 2.489861128179919, "grad_norm": 0.044414598500096596, "learning_rate": 0.0001048174190987232, "loss": 0.8394, "step": 141820 }, { "epoch": 2.4900366930599205, "grad_norm": 0.06088374474657002, "learning_rate": 0.00010480628372949755, "loss": 0.8491, "step": 141830 }, { "epoch": 2.4902122579399215, "grad_norm": 0.06421947097970253, "learning_rate": 0.00010479514831204555, "loss": 0.8406, "step": 141840 }, { "epoch": 2.490387822819923, "grad_norm": 0.057555405683508296, "learning_rate": 0.00010478401284650827, "loss": 0.8409, "step": 141850 }, { "epoch": 2.4905633876999245, "grad_norm": 0.056023966110684165, "learning_rate": 0.00010477287733302686, "loss": 0.839, "step": 141860 }, { "epoch": 2.490738952579926, "grad_norm": 0.06268569362521197, "learning_rate": 0.00010476174177174232, "loss": 0.8451, "step": 141870 }, { "epoch": 2.4909145174599274, "grad_norm": 0.04949981440752606, "learning_rate": 0.00010475060616279585, "loss": 0.8474, "step": 141880 }, { "epoch": 2.491090082339929, "grad_norm": 0.07426592546754071, "learning_rate": 0.00010473947050632847, "loss": 0.8467, "step": 141890 }, { "epoch": 2.49126564721993, "grad_norm": 0.05736928363324326, "learning_rate": 0.00010472833480248125, "loss": 0.8461, "step": 141900 }, { "epoch": 2.4914412120999314, "grad_norm": 0.05464208418813337, "learning_rate": 0.00010471719905139534, "loss": 0.8472, "step": 141910 }, { "epoch": 2.491616776979933, "grad_norm": 0.05309799711449186, "learning_rate": 0.00010470606325321177, "loss": 0.843, "step": 141920 }, { "epoch": 2.4917923418599344, "grad_norm": 0.05290438307295485, "learning_rate": 0.00010469492740807167, "loss": 0.8453, "step": 141930 }, { "epoch": 2.491967906739936, "grad_norm": 0.05835679355655987, "learning_rate": 0.00010468379151611616, "loss": 0.8439, "step": 141940 }, { "epoch": 2.4921434716199373, "grad_norm": 0.058849418620705383, "learning_rate": 0.00010467265557748632, "loss": 0.8538, "step": 141950 }, { "epoch": 2.4923190364999384, "grad_norm": 0.07309043041961152, "learning_rate": 0.00010466151959232322, "loss": 0.839, "step": 141960 }, { "epoch": 2.49249460137994, "grad_norm": 0.041047442527093234, "learning_rate": 0.00010465038356076795, "loss": 0.8457, "step": 141970 }, { "epoch": 2.4926701662599413, "grad_norm": 0.058291872669000946, "learning_rate": 0.00010463924748296161, "loss": 0.8461, "step": 141980 }, { "epoch": 2.492845731139943, "grad_norm": 0.06546787291502906, "learning_rate": 0.0001046281113590453, "loss": 0.8435, "step": 141990 }, { "epoch": 2.4930212960199443, "grad_norm": 0.05267869889204819, "learning_rate": 0.00010461697518916018, "loss": 0.8368, "step": 142000 }, { "epoch": 2.4931968608999457, "grad_norm": 0.05474825289738824, "learning_rate": 0.00010460583897344727, "loss": 0.8501, "step": 142010 }, { "epoch": 2.493372425779947, "grad_norm": 0.04925234422680641, "learning_rate": 0.00010459470271204766, "loss": 0.8531, "step": 142020 }, { "epoch": 2.4935479906599483, "grad_norm": 0.04651521177890632, "learning_rate": 0.00010458356640510252, "loss": 0.8457, "step": 142030 }, { "epoch": 2.4937235555399497, "grad_norm": 0.05699343007551018, "learning_rate": 0.00010457243005275285, "loss": 0.8399, "step": 142040 }, { "epoch": 2.493899120419951, "grad_norm": 0.06505259454110476, "learning_rate": 0.00010456129365513984, "loss": 0.84, "step": 142050 }, { "epoch": 2.4940746852999527, "grad_norm": 0.05673080852467737, "learning_rate": 0.00010455015721240456, "loss": 0.8378, "step": 142060 }, { "epoch": 2.494250250179954, "grad_norm": 0.052708267239213805, "learning_rate": 0.00010453902072468809, "loss": 0.8412, "step": 142070 }, { "epoch": 2.494425815059955, "grad_norm": 0.060724540356128384, "learning_rate": 0.00010452788419213158, "loss": 0.8454, "step": 142080 }, { "epoch": 2.4946013799399567, "grad_norm": 0.07733627610369524, "learning_rate": 0.00010451674761487607, "loss": 0.8378, "step": 142090 }, { "epoch": 2.494776944819958, "grad_norm": 0.0645278853624601, "learning_rate": 0.00010450561099306268, "loss": 0.8496, "step": 142100 }, { "epoch": 2.4949525096999596, "grad_norm": 0.05421089871574251, "learning_rate": 0.00010449447432683256, "loss": 0.8451, "step": 142110 }, { "epoch": 2.495128074579961, "grad_norm": 0.05614927869641993, "learning_rate": 0.0001044833376163268, "loss": 0.8399, "step": 142120 }, { "epoch": 2.4953036394599626, "grad_norm": 0.06364964037771509, "learning_rate": 0.00010447220086168645, "loss": 0.8424, "step": 142130 }, { "epoch": 2.495479204339964, "grad_norm": 0.06680214731601943, "learning_rate": 0.00010446106406305265, "loss": 0.8474, "step": 142140 }, { "epoch": 2.495654769219965, "grad_norm": 0.04942739663474548, "learning_rate": 0.00010444992722056652, "loss": 0.8452, "step": 142150 }, { "epoch": 2.4958303340999666, "grad_norm": 0.05675780846077037, "learning_rate": 0.00010443879033436914, "loss": 0.8514, "step": 142160 }, { "epoch": 2.496005898979968, "grad_norm": 0.06864216851061107, "learning_rate": 0.00010442765340460163, "loss": 0.8456, "step": 142170 }, { "epoch": 2.4961814638599695, "grad_norm": 0.050860233743889025, "learning_rate": 0.00010441651643140511, "loss": 0.8395, "step": 142180 }, { "epoch": 2.496357028739971, "grad_norm": 0.06389000823511705, "learning_rate": 0.00010440537941492064, "loss": 0.8506, "step": 142190 }, { "epoch": 2.496532593619972, "grad_norm": 0.05647017971916099, "learning_rate": 0.00010439424235528938, "loss": 0.8432, "step": 142200 }, { "epoch": 2.4967081584999735, "grad_norm": 0.05111701388427384, "learning_rate": 0.00010438310525265243, "loss": 0.8433, "step": 142210 }, { "epoch": 2.496883723379975, "grad_norm": 0.053502953813700814, "learning_rate": 0.00010437196810715086, "loss": 0.8463, "step": 142220 }, { "epoch": 2.4970592882599765, "grad_norm": 0.05337880583213217, "learning_rate": 0.00010436083091892585, "loss": 0.8494, "step": 142230 }, { "epoch": 2.497234853139978, "grad_norm": 0.04092303648713773, "learning_rate": 0.00010434969368811842, "loss": 0.8492, "step": 142240 }, { "epoch": 2.4974104180199794, "grad_norm": 0.045957268689319125, "learning_rate": 0.00010433855641486976, "loss": 0.8394, "step": 142250 }, { "epoch": 2.497585982899981, "grad_norm": 0.04907324937328052, "learning_rate": 0.00010432741909932093, "loss": 0.8441, "step": 142260 }, { "epoch": 2.497761547779982, "grad_norm": 0.04700952308354452, "learning_rate": 0.00010431628174161305, "loss": 0.842, "step": 142270 }, { "epoch": 2.4979371126599834, "grad_norm": 0.08220551903276717, "learning_rate": 0.00010430514434188728, "loss": 0.84, "step": 142280 }, { "epoch": 2.498112677539985, "grad_norm": 0.08191950755686028, "learning_rate": 0.00010429400690028468, "loss": 0.84, "step": 142290 }, { "epoch": 2.4982882424199864, "grad_norm": 0.052410491442479196, "learning_rate": 0.00010428286941694642, "loss": 0.8431, "step": 142300 }, { "epoch": 2.498463807299988, "grad_norm": 0.06969151232710846, "learning_rate": 0.00010427173189201354, "loss": 0.8376, "step": 142310 }, { "epoch": 2.498639372179989, "grad_norm": 0.06032139152171676, "learning_rate": 0.00010426059432562716, "loss": 0.8494, "step": 142320 }, { "epoch": 2.4988149370599904, "grad_norm": 0.050807264956958885, "learning_rate": 0.00010424945671792848, "loss": 0.835, "step": 142330 }, { "epoch": 2.498990501939992, "grad_norm": 0.051323001522319306, "learning_rate": 0.00010423831906905851, "loss": 0.8405, "step": 142340 }, { "epoch": 2.4991660668199933, "grad_norm": 0.060196482442892454, "learning_rate": 0.00010422718137915848, "loss": 0.8389, "step": 142350 }, { "epoch": 2.499341631699995, "grad_norm": 0.060034011378967314, "learning_rate": 0.00010421604364836939, "loss": 0.8451, "step": 142360 }, { "epoch": 2.4995171965799963, "grad_norm": 0.05381261118122093, "learning_rate": 0.00010420490587683244, "loss": 0.8424, "step": 142370 }, { "epoch": 2.4996927614599977, "grad_norm": 0.06091959945379747, "learning_rate": 0.00010419376806468868, "loss": 0.8421, "step": 142380 }, { "epoch": 2.499868326339999, "grad_norm": 0.05239617074176352, "learning_rate": 0.0001041826302120793, "loss": 0.8405, "step": 142390 }, { "epoch": 2.5000438912200003, "grad_norm": 0.055557674307425864, "learning_rate": 0.0001041714923191454, "loss": 0.8474, "step": 142400 }, { "epoch": 2.5002194561000017, "grad_norm": 0.0482929592092805, "learning_rate": 0.00010416035438602804, "loss": 0.8502, "step": 142410 }, { "epoch": 2.500395020980003, "grad_norm": 0.05050958514265602, "learning_rate": 0.00010414921641286841, "loss": 0.8463, "step": 142420 }, { "epoch": 2.5005705858600047, "grad_norm": 0.048200336744174906, "learning_rate": 0.0001041380783998076, "loss": 0.8446, "step": 142430 }, { "epoch": 2.5007461507400057, "grad_norm": 0.058710630735546866, "learning_rate": 0.00010412694034698673, "loss": 0.8425, "step": 142440 }, { "epoch": 2.5009217156200076, "grad_norm": 0.058655749321967945, "learning_rate": 0.00010411580225454694, "loss": 0.8547, "step": 142450 }, { "epoch": 2.5010972805000087, "grad_norm": 0.067835974452207, "learning_rate": 0.00010410466412262933, "loss": 0.8429, "step": 142460 }, { "epoch": 2.50127284538001, "grad_norm": 0.049072308401928146, "learning_rate": 0.00010409352595137507, "loss": 0.8511, "step": 142470 }, { "epoch": 2.5014484102600116, "grad_norm": 0.048639282001247924, "learning_rate": 0.00010408238774092518, "loss": 0.8536, "step": 142480 }, { "epoch": 2.501623975140013, "grad_norm": 0.05043195079806146, "learning_rate": 0.00010407124949142086, "loss": 0.8485, "step": 142490 }, { "epoch": 2.5017995400200146, "grad_norm": 0.06178616927742885, "learning_rate": 0.00010406011120300326, "loss": 0.8524, "step": 142500 }, { "epoch": 2.5019751049000156, "grad_norm": 0.06873577295154092, "learning_rate": 0.00010404897287581343, "loss": 0.8448, "step": 142510 }, { "epoch": 2.502150669780017, "grad_norm": 0.0630658956141933, "learning_rate": 0.00010403783450999254, "loss": 0.8454, "step": 142520 }, { "epoch": 2.5023262346600186, "grad_norm": 0.05107508657042763, "learning_rate": 0.00010402669610568169, "loss": 0.8373, "step": 142530 }, { "epoch": 2.50250179954002, "grad_norm": 0.050393969471772326, "learning_rate": 0.00010401555766302204, "loss": 0.8428, "step": 142540 }, { "epoch": 2.5026773644200215, "grad_norm": 0.05525439178992741, "learning_rate": 0.00010400441918215469, "loss": 0.8347, "step": 142550 }, { "epoch": 2.5028529293000226, "grad_norm": 0.05034115191450633, "learning_rate": 0.00010399328066322078, "loss": 0.8455, "step": 142560 }, { "epoch": 2.5030284941800245, "grad_norm": 0.057166155164746754, "learning_rate": 0.00010398214210636141, "loss": 0.8475, "step": 142570 }, { "epoch": 2.5032040590600255, "grad_norm": 0.04144004981974555, "learning_rate": 0.00010397100351171773, "loss": 0.8505, "step": 142580 }, { "epoch": 2.503379623940027, "grad_norm": 0.06444361298871397, "learning_rate": 0.0001039598648794309, "loss": 0.8462, "step": 142590 }, { "epoch": 2.5035551888200285, "grad_norm": 0.055629641149774194, "learning_rate": 0.00010394872620964196, "loss": 0.8494, "step": 142600 }, { "epoch": 2.50373075370003, "grad_norm": 0.0657218175800107, "learning_rate": 0.00010393758750249212, "loss": 0.8438, "step": 142610 }, { "epoch": 2.5039063185800314, "grad_norm": 0.058003185508603984, "learning_rate": 0.0001039264487581225, "loss": 0.8365, "step": 142620 }, { "epoch": 2.5040818834600325, "grad_norm": 0.04714232775417791, "learning_rate": 0.00010391530997667421, "loss": 0.8401, "step": 142630 }, { "epoch": 2.504257448340034, "grad_norm": 0.04799103250452571, "learning_rate": 0.00010390417115828839, "loss": 0.8345, "step": 142640 }, { "epoch": 2.5044330132200354, "grad_norm": 0.04788733917392599, "learning_rate": 0.00010389303230310612, "loss": 0.8532, "step": 142650 }, { "epoch": 2.504608578100037, "grad_norm": 0.04465443008171607, "learning_rate": 0.0001038818934112686, "loss": 0.8479, "step": 142660 }, { "epoch": 2.5047841429800384, "grad_norm": 0.06597121954923378, "learning_rate": 0.00010387075448291695, "loss": 0.8442, "step": 142670 }, { "epoch": 2.5049597078600394, "grad_norm": 0.054370616025878726, "learning_rate": 0.0001038596155181923, "loss": 0.8432, "step": 142680 }, { "epoch": 2.5051352727400413, "grad_norm": 0.056732381270401276, "learning_rate": 0.00010384847651723574, "loss": 0.8465, "step": 142690 }, { "epoch": 2.5053108376200424, "grad_norm": 0.054813611673800215, "learning_rate": 0.00010383733748018846, "loss": 0.8397, "step": 142700 }, { "epoch": 2.505486402500044, "grad_norm": 0.05237390028097457, "learning_rate": 0.00010382619840719156, "loss": 0.8467, "step": 142710 }, { "epoch": 2.5056619673800453, "grad_norm": 0.05332096390885668, "learning_rate": 0.0001038150592983862, "loss": 0.8459, "step": 142720 }, { "epoch": 2.505837532260047, "grad_norm": 0.05529552762971052, "learning_rate": 0.0001038039201539135, "loss": 0.8535, "step": 142730 }, { "epoch": 2.5060130971400483, "grad_norm": 0.05174451113559161, "learning_rate": 0.00010379278097391458, "loss": 0.8446, "step": 142740 }, { "epoch": 2.5061886620200493, "grad_norm": 0.05249996742814569, "learning_rate": 0.00010378164175853058, "loss": 0.8452, "step": 142750 }, { "epoch": 2.506364226900051, "grad_norm": 0.06305566057914178, "learning_rate": 0.00010377050250790268, "loss": 0.8477, "step": 142760 }, { "epoch": 2.5065397917800523, "grad_norm": 0.056534575274087694, "learning_rate": 0.00010375936322217192, "loss": 0.844, "step": 142770 }, { "epoch": 2.5067153566600537, "grad_norm": 0.04844733368167266, "learning_rate": 0.00010374822390147956, "loss": 0.8394, "step": 142780 }, { "epoch": 2.506890921540055, "grad_norm": 0.05920005241109342, "learning_rate": 0.00010373708454596665, "loss": 0.8408, "step": 142790 }, { "epoch": 2.5070664864200567, "grad_norm": 0.06286544601064697, "learning_rate": 0.00010372594515577437, "loss": 0.8348, "step": 142800 }, { "epoch": 2.507242051300058, "grad_norm": 0.06218715112525819, "learning_rate": 0.00010371480573104385, "loss": 0.8354, "step": 142810 }, { "epoch": 2.507417616180059, "grad_norm": 0.04384301886611398, "learning_rate": 0.00010370366627191617, "loss": 0.8495, "step": 142820 }, { "epoch": 2.5075931810600607, "grad_norm": 0.05464613872097064, "learning_rate": 0.00010369252677853253, "loss": 0.8461, "step": 142830 }, { "epoch": 2.507768745940062, "grad_norm": 0.050536930753246084, "learning_rate": 0.00010368138725103411, "loss": 0.8438, "step": 142840 }, { "epoch": 2.5079443108200636, "grad_norm": 0.06704454584497295, "learning_rate": 0.00010367024768956199, "loss": 0.8416, "step": 142850 }, { "epoch": 2.508119875700065, "grad_norm": 0.05479580488975961, "learning_rate": 0.00010365910809425729, "loss": 0.8372, "step": 142860 }, { "epoch": 2.508295440580066, "grad_norm": 0.061197145274567, "learning_rate": 0.00010364796846526118, "loss": 0.8465, "step": 142870 }, { "epoch": 2.5084710054600676, "grad_norm": 0.07195592349859847, "learning_rate": 0.00010363682880271486, "loss": 0.8369, "step": 142880 }, { "epoch": 2.508646570340069, "grad_norm": 0.06480400746900229, "learning_rate": 0.00010362568910675934, "loss": 0.8451, "step": 142890 }, { "epoch": 2.5088221352200706, "grad_norm": 0.0552972896075506, "learning_rate": 0.00010361454937753587, "loss": 0.8511, "step": 142900 }, { "epoch": 2.508997700100072, "grad_norm": 0.051880569818987775, "learning_rate": 0.00010360340961518558, "loss": 0.8485, "step": 142910 }, { "epoch": 2.5091732649800735, "grad_norm": 0.04872346645911148, "learning_rate": 0.00010359226981984955, "loss": 0.8448, "step": 142920 }, { "epoch": 2.509348829860075, "grad_norm": 0.053363997554426564, "learning_rate": 0.00010358112999166898, "loss": 0.8441, "step": 142930 }, { "epoch": 2.509524394740076, "grad_norm": 0.05260189444263742, "learning_rate": 0.00010356999013078499, "loss": 0.8464, "step": 142940 }, { "epoch": 2.5096999596200775, "grad_norm": 0.04745895206458539, "learning_rate": 0.0001035588502373387, "loss": 0.8472, "step": 142950 }, { "epoch": 2.509875524500079, "grad_norm": 0.05397405849271323, "learning_rate": 0.00010354771031147137, "loss": 0.8478, "step": 142960 }, { "epoch": 2.5100510893800805, "grad_norm": 0.056935908980208644, "learning_rate": 0.00010353657035332402, "loss": 0.8495, "step": 142970 }, { "epoch": 2.510226654260082, "grad_norm": 0.047590178594552515, "learning_rate": 0.00010352543036303786, "loss": 0.8403, "step": 142980 }, { "epoch": 2.510402219140083, "grad_norm": 0.06080154464338497, "learning_rate": 0.00010351429034075395, "loss": 0.8442, "step": 142990 }, { "epoch": 2.5105777840200845, "grad_norm": 0.07800170021664385, "learning_rate": 0.00010350315028661355, "loss": 0.8478, "step": 143000 }, { "epoch": 2.510753348900086, "grad_norm": 0.05202677419223641, "learning_rate": 0.00010349201020075776, "loss": 0.8394, "step": 143010 }, { "epoch": 2.5109289137800874, "grad_norm": 0.047149188386531975, "learning_rate": 0.0001034808700833277, "loss": 0.8485, "step": 143020 }, { "epoch": 2.511104478660089, "grad_norm": 0.04972712760529804, "learning_rate": 0.00010346972993446458, "loss": 0.8435, "step": 143030 }, { "epoch": 2.5112800435400904, "grad_norm": 0.06667301327830123, "learning_rate": 0.00010345858975430945, "loss": 0.8382, "step": 143040 }, { "epoch": 2.511455608420092, "grad_norm": 0.05446105374794501, "learning_rate": 0.00010344744954300356, "loss": 0.8493, "step": 143050 }, { "epoch": 2.511631173300093, "grad_norm": 0.04810330425553091, "learning_rate": 0.000103436309300688, "loss": 0.8486, "step": 143060 }, { "epoch": 2.5118067381800944, "grad_norm": 0.04639368740059439, "learning_rate": 0.00010342516902750391, "loss": 0.8449, "step": 143070 }, { "epoch": 2.511982303060096, "grad_norm": 0.07705661593047594, "learning_rate": 0.00010341402872359254, "loss": 0.844, "step": 143080 }, { "epoch": 2.5121578679400973, "grad_norm": 0.047487684482404854, "learning_rate": 0.00010340288838909488, "loss": 0.8462, "step": 143090 }, { "epoch": 2.512333432820099, "grad_norm": 0.05002069853802224, "learning_rate": 0.0001033917480241522, "loss": 0.8458, "step": 143100 }, { "epoch": 2.5125089977001, "grad_norm": 0.05818478345926438, "learning_rate": 0.0001033806076289056, "loss": 0.8409, "step": 143110 }, { "epoch": 2.5126845625801013, "grad_norm": 0.05757689159003821, "learning_rate": 0.00010336946720349625, "loss": 0.844, "step": 143120 }, { "epoch": 2.512860127460103, "grad_norm": 0.05057550846153971, "learning_rate": 0.00010335832674806529, "loss": 0.838, "step": 143130 }, { "epoch": 2.5130356923401043, "grad_norm": 0.050480831780058855, "learning_rate": 0.0001033471862627539, "loss": 0.8427, "step": 143140 }, { "epoch": 2.5132112572201057, "grad_norm": 0.050954800323316805, "learning_rate": 0.00010333604574770323, "loss": 0.8499, "step": 143150 }, { "epoch": 2.513386822100107, "grad_norm": 0.053307985974010166, "learning_rate": 0.00010332490520305436, "loss": 0.8434, "step": 143160 }, { "epoch": 2.5135623869801087, "grad_norm": 0.054141337067350435, "learning_rate": 0.00010331376462894853, "loss": 0.8456, "step": 143170 }, { "epoch": 2.5137379518601097, "grad_norm": 0.05742022412315701, "learning_rate": 0.00010330262402552682, "loss": 0.8502, "step": 143180 }, { "epoch": 2.513913516740111, "grad_norm": 0.06033206272016542, "learning_rate": 0.00010329148339293047, "loss": 0.85, "step": 143190 }, { "epoch": 2.5140890816201127, "grad_norm": 0.05197275287615295, "learning_rate": 0.00010328034273130059, "loss": 0.8418, "step": 143200 }, { "epoch": 2.514264646500114, "grad_norm": 0.042280136978048805, "learning_rate": 0.00010326920204077828, "loss": 0.8421, "step": 143210 }, { "epoch": 2.5144402113801156, "grad_norm": 0.06394313887351781, "learning_rate": 0.00010325806132150478, "loss": 0.8494, "step": 143220 }, { "epoch": 2.5146157762601167, "grad_norm": 0.04909531656881485, "learning_rate": 0.00010324692057362118, "loss": 0.853, "step": 143230 }, { "epoch": 2.5147913411401186, "grad_norm": 0.047220374289755514, "learning_rate": 0.0001032357797972687, "loss": 0.8473, "step": 143240 }, { "epoch": 2.5149669060201196, "grad_norm": 0.05023423412730481, "learning_rate": 0.00010322463899258846, "loss": 0.8492, "step": 143250 }, { "epoch": 2.515142470900121, "grad_norm": 0.06281718691099095, "learning_rate": 0.00010321349815972159, "loss": 0.8474, "step": 143260 }, { "epoch": 2.5153180357801226, "grad_norm": 0.04412149621324071, "learning_rate": 0.00010320235729880929, "loss": 0.843, "step": 143270 }, { "epoch": 2.515493600660124, "grad_norm": 0.05260416439622253, "learning_rate": 0.00010319121640999269, "loss": 0.8413, "step": 143280 }, { "epoch": 2.5156691655401255, "grad_norm": 0.04531566356763744, "learning_rate": 0.00010318007549341295, "loss": 0.8536, "step": 143290 }, { "epoch": 2.5158447304201266, "grad_norm": 0.05063641109357206, "learning_rate": 0.00010316893454921126, "loss": 0.8505, "step": 143300 }, { "epoch": 2.516020295300128, "grad_norm": 0.06587380659479843, "learning_rate": 0.00010315779357752877, "loss": 0.8394, "step": 143310 }, { "epoch": 2.5161958601801295, "grad_norm": 0.04969717168841306, "learning_rate": 0.00010314665257850659, "loss": 0.848, "step": 143320 }, { "epoch": 2.516371425060131, "grad_norm": 0.06130554251609734, "learning_rate": 0.00010313551155228593, "loss": 0.8475, "step": 143330 }, { "epoch": 2.5165469899401325, "grad_norm": 0.059024783862386354, "learning_rate": 0.00010312437049900792, "loss": 0.8404, "step": 143340 }, { "epoch": 2.5167225548201335, "grad_norm": 0.05513863385961283, "learning_rate": 0.00010311322941881371, "loss": 0.8329, "step": 143350 }, { "epoch": 2.5168981197001354, "grad_norm": 0.0771009684609572, "learning_rate": 0.00010310208831184451, "loss": 0.8507, "step": 143360 }, { "epoch": 2.5170736845801365, "grad_norm": 0.07427337771390935, "learning_rate": 0.00010309094717824144, "loss": 0.847, "step": 143370 }, { "epoch": 2.517249249460138, "grad_norm": 0.047207835039594415, "learning_rate": 0.00010307980601814567, "loss": 0.8481, "step": 143380 }, { "epoch": 2.5174248143401394, "grad_norm": 0.05030302750316479, "learning_rate": 0.00010306866483169837, "loss": 0.8361, "step": 143390 }, { "epoch": 2.517600379220141, "grad_norm": 0.04286832227402328, "learning_rate": 0.00010305752361904064, "loss": 0.8352, "step": 143400 }, { "epoch": 2.5177759441001424, "grad_norm": 0.06418985086182974, "learning_rate": 0.00010304638238031375, "loss": 0.8468, "step": 143410 }, { "epoch": 2.5179515089801434, "grad_norm": 0.05056812838781507, "learning_rate": 0.00010303524111565876, "loss": 0.8455, "step": 143420 }, { "epoch": 2.518127073860145, "grad_norm": 0.0586320259523311, "learning_rate": 0.00010302409982521688, "loss": 0.8394, "step": 143430 }, { "epoch": 2.5183026387401464, "grad_norm": 0.055120060448774134, "learning_rate": 0.00010301295850912931, "loss": 0.8427, "step": 143440 }, { "epoch": 2.518478203620148, "grad_norm": 0.048793231668800924, "learning_rate": 0.00010300181716753711, "loss": 0.8418, "step": 143450 }, { "epoch": 2.5186537685001493, "grad_norm": 0.04439354475546193, "learning_rate": 0.00010299067580058153, "loss": 0.8398, "step": 143460 }, { "epoch": 2.5188293333801504, "grad_norm": 0.07418661259855411, "learning_rate": 0.00010297953440840371, "loss": 0.8438, "step": 143470 }, { "epoch": 2.5190048982601523, "grad_norm": 0.05646826331049359, "learning_rate": 0.00010296839299114483, "loss": 0.8514, "step": 143480 }, { "epoch": 2.5191804631401533, "grad_norm": 0.04712102428632186, "learning_rate": 0.000102957251548946, "loss": 0.8407, "step": 143490 }, { "epoch": 2.519356028020155, "grad_norm": 0.07152034074391402, "learning_rate": 0.00010294611008194843, "loss": 0.8387, "step": 143500 }, { "epoch": 2.5195315929001563, "grad_norm": 0.07374617627746383, "learning_rate": 0.00010293496859029329, "loss": 0.8417, "step": 143510 }, { "epoch": 2.5197071577801577, "grad_norm": 0.04223256039630638, "learning_rate": 0.00010292382707412169, "loss": 0.8461, "step": 143520 }, { "epoch": 2.5198827226601592, "grad_norm": 0.04689582242625291, "learning_rate": 0.00010291268553357487, "loss": 0.8395, "step": 143530 }, { "epoch": 2.5200582875401603, "grad_norm": 0.05075645851690045, "learning_rate": 0.00010290154396879393, "loss": 0.8305, "step": 143540 }, { "epoch": 2.5202338524201617, "grad_norm": 0.06366515165136206, "learning_rate": 0.00010289040237992007, "loss": 0.8454, "step": 143550 }, { "epoch": 2.520409417300163, "grad_norm": 0.058699963349016344, "learning_rate": 0.00010287926076709447, "loss": 0.8413, "step": 143560 }, { "epoch": 2.5205849821801647, "grad_norm": 0.05038195763213132, "learning_rate": 0.00010286811913045822, "loss": 0.8436, "step": 143570 }, { "epoch": 2.520760547060166, "grad_norm": 0.08045375640577782, "learning_rate": 0.00010285697747015262, "loss": 0.8484, "step": 143580 }, { "epoch": 2.5209361119401676, "grad_norm": 0.051158523151698096, "learning_rate": 0.0001028458357863187, "loss": 0.8425, "step": 143590 }, { "epoch": 2.521111676820169, "grad_norm": 0.05618380014742749, "learning_rate": 0.00010283469407909769, "loss": 0.8384, "step": 143600 }, { "epoch": 2.52128724170017, "grad_norm": 0.055280703522958774, "learning_rate": 0.00010282355234863081, "loss": 0.838, "step": 143610 }, { "epoch": 2.5214628065801716, "grad_norm": 0.056053489934573854, "learning_rate": 0.00010281241059505911, "loss": 0.8374, "step": 143620 }, { "epoch": 2.521638371460173, "grad_norm": 0.047892457124697946, "learning_rate": 0.00010280126881852383, "loss": 0.8422, "step": 143630 }, { "epoch": 2.5218139363401746, "grad_norm": 0.06246235642706999, "learning_rate": 0.00010279012701916615, "loss": 0.8462, "step": 143640 }, { "epoch": 2.521989501220176, "grad_norm": 0.059865640418197565, "learning_rate": 0.0001027789851971272, "loss": 0.8519, "step": 143650 }, { "epoch": 2.522165066100177, "grad_norm": 0.04524054412480435, "learning_rate": 0.00010276784335254821, "loss": 0.8452, "step": 143660 }, { "epoch": 2.5223406309801786, "grad_norm": 0.05014890722281261, "learning_rate": 0.00010275670148557026, "loss": 0.8492, "step": 143670 }, { "epoch": 2.52251619586018, "grad_norm": 0.06285568492259515, "learning_rate": 0.00010274555959633458, "loss": 0.8384, "step": 143680 }, { "epoch": 2.5226917607401815, "grad_norm": 0.060456780860240804, "learning_rate": 0.00010273441768498231, "loss": 0.8503, "step": 143690 }, { "epoch": 2.522867325620183, "grad_norm": 0.047242234917190734, "learning_rate": 0.00010272327575165466, "loss": 0.8461, "step": 143700 }, { "epoch": 2.5230428905001845, "grad_norm": 0.04502981961272794, "learning_rate": 0.00010271213379649275, "loss": 0.8406, "step": 143710 }, { "epoch": 2.523218455380186, "grad_norm": 0.049124791540028086, "learning_rate": 0.00010270099181963781, "loss": 0.8387, "step": 143720 }, { "epoch": 2.523394020260187, "grad_norm": 0.050553661365431955, "learning_rate": 0.00010268984982123098, "loss": 0.8479, "step": 143730 }, { "epoch": 2.5235695851401885, "grad_norm": 0.055505030502217736, "learning_rate": 0.00010267870780141338, "loss": 0.8352, "step": 143740 }, { "epoch": 2.52374515002019, "grad_norm": 0.05633584035815997, "learning_rate": 0.00010266756576032628, "loss": 0.8379, "step": 143750 }, { "epoch": 2.5239207149001914, "grad_norm": 0.05404121847433988, "learning_rate": 0.00010265642369811078, "loss": 0.8331, "step": 143760 }, { "epoch": 2.524096279780193, "grad_norm": 0.06611752726945576, "learning_rate": 0.00010264528161490805, "loss": 0.8418, "step": 143770 }, { "epoch": 2.524271844660194, "grad_norm": 0.04590745348889938, "learning_rate": 0.00010263413951085932, "loss": 0.8412, "step": 143780 }, { "epoch": 2.5244474095401954, "grad_norm": 0.05572260867829112, "learning_rate": 0.0001026229973861057, "loss": 0.8419, "step": 143790 }, { "epoch": 2.524622974420197, "grad_norm": 0.07500880332763435, "learning_rate": 0.00010261185524078842, "loss": 0.8444, "step": 143800 }, { "epoch": 2.5247985393001984, "grad_norm": 0.054865828010991345, "learning_rate": 0.00010260071307504862, "loss": 0.8424, "step": 143810 }, { "epoch": 2.5249741041802, "grad_norm": 0.052831401316494775, "learning_rate": 0.00010258957088902747, "loss": 0.8404, "step": 143820 }, { "epoch": 2.5251496690602013, "grad_norm": 0.05013736606394887, "learning_rate": 0.0001025784286828662, "loss": 0.8418, "step": 143830 }, { "epoch": 2.525325233940203, "grad_norm": 0.04933393561334326, "learning_rate": 0.00010256728645670586, "loss": 0.8383, "step": 143840 }, { "epoch": 2.525500798820204, "grad_norm": 0.08843762484670437, "learning_rate": 0.00010255614421068776, "loss": 0.8478, "step": 143850 }, { "epoch": 2.5256763637002053, "grad_norm": 0.04230827413399932, "learning_rate": 0.00010254500194495298, "loss": 0.842, "step": 143860 }, { "epoch": 2.525851928580207, "grad_norm": 0.052849253128135515, "learning_rate": 0.00010253385965964275, "loss": 0.8462, "step": 143870 }, { "epoch": 2.5260274934602083, "grad_norm": 0.06650163105911105, "learning_rate": 0.00010252271735489823, "loss": 0.8466, "step": 143880 }, { "epoch": 2.5262030583402098, "grad_norm": 0.05522858290699449, "learning_rate": 0.00010251157503086056, "loss": 0.8368, "step": 143890 }, { "epoch": 2.526378623220211, "grad_norm": 0.05281469347519334, "learning_rate": 0.00010250043268767096, "loss": 0.8427, "step": 143900 }, { "epoch": 2.5265541881002127, "grad_norm": 0.04011224673159607, "learning_rate": 0.00010248929032547056, "loss": 0.8381, "step": 143910 }, { "epoch": 2.5267297529802137, "grad_norm": 0.05999450994954265, "learning_rate": 0.0001024781479444006, "loss": 0.8506, "step": 143920 }, { "epoch": 2.526905317860215, "grad_norm": 0.04188012957177769, "learning_rate": 0.00010246700554460225, "loss": 0.8512, "step": 143930 }, { "epoch": 2.5270808827402167, "grad_norm": 0.06000569626031392, "learning_rate": 0.0001024558631262166, "loss": 0.8412, "step": 143940 }, { "epoch": 2.527256447620218, "grad_norm": 0.048639245334721685, "learning_rate": 0.00010244472068938493, "loss": 0.8405, "step": 143950 }, { "epoch": 2.5274320125002196, "grad_norm": 0.046443631883778716, "learning_rate": 0.00010243357823424832, "loss": 0.8507, "step": 143960 }, { "epoch": 2.5276075773802207, "grad_norm": 0.04861461288076238, "learning_rate": 0.00010242243576094803, "loss": 0.8421, "step": 143970 }, { "epoch": 2.527783142260222, "grad_norm": 0.047273022154055065, "learning_rate": 0.00010241129326962522, "loss": 0.8474, "step": 143980 }, { "epoch": 2.5279587071402236, "grad_norm": 0.047189964789937706, "learning_rate": 0.00010240015076042104, "loss": 0.8528, "step": 143990 }, { "epoch": 2.528134272020225, "grad_norm": 0.0558005030110785, "learning_rate": 0.0001023890082334767, "loss": 0.8421, "step": 144000 }, { "epoch": 2.5283098369002266, "grad_norm": 0.04880202261107336, "learning_rate": 0.00010237786568893331, "loss": 0.8426, "step": 144010 }, { "epoch": 2.5284854017802276, "grad_norm": 0.04943245530534555, "learning_rate": 0.00010236672312693214, "loss": 0.8431, "step": 144020 }, { "epoch": 2.5286609666602295, "grad_norm": 0.053766068110709934, "learning_rate": 0.00010235558054761431, "loss": 0.841, "step": 144030 }, { "epoch": 2.5288365315402306, "grad_norm": 0.05953189811720162, "learning_rate": 0.00010234443795112106, "loss": 0.8443, "step": 144040 }, { "epoch": 2.529012096420232, "grad_norm": 0.06341358409027839, "learning_rate": 0.00010233329533759346, "loss": 0.8426, "step": 144050 }, { "epoch": 2.5291876613002335, "grad_norm": 0.05503407426221624, "learning_rate": 0.00010232215270717278, "loss": 0.8469, "step": 144060 }, { "epoch": 2.529363226180235, "grad_norm": 0.04594711287537838, "learning_rate": 0.00010231101006000017, "loss": 0.8415, "step": 144070 }, { "epoch": 2.5295387910602365, "grad_norm": 0.04537270047287433, "learning_rate": 0.0001022998673962168, "loss": 0.8444, "step": 144080 }, { "epoch": 2.5297143559402375, "grad_norm": 0.06079361127125157, "learning_rate": 0.0001022887247159639, "loss": 0.8464, "step": 144090 }, { "epoch": 2.529889920820239, "grad_norm": 0.053222530632800365, "learning_rate": 0.00010227758201938256, "loss": 0.8413, "step": 144100 }, { "epoch": 2.5300654857002405, "grad_norm": 0.047679746815606834, "learning_rate": 0.00010226643930661404, "loss": 0.8465, "step": 144110 }, { "epoch": 2.530241050580242, "grad_norm": 0.05505141574667855, "learning_rate": 0.00010225529657779946, "loss": 0.8433, "step": 144120 }, { "epoch": 2.5304166154602434, "grad_norm": 0.06708201657668068, "learning_rate": 0.00010224415383308008, "loss": 0.8418, "step": 144130 }, { "epoch": 2.5305921803402445, "grad_norm": 0.06965235478992829, "learning_rate": 0.00010223301107259699, "loss": 0.8376, "step": 144140 }, { "epoch": 2.5307677452202464, "grad_norm": 0.044920926809689676, "learning_rate": 0.00010222186829649143, "loss": 0.8357, "step": 144150 }, { "epoch": 2.5309433101002474, "grad_norm": 0.06288929959794745, "learning_rate": 0.00010221072550490456, "loss": 0.8481, "step": 144160 }, { "epoch": 2.531118874980249, "grad_norm": 0.05597092464521089, "learning_rate": 0.0001021995826979776, "loss": 0.8453, "step": 144170 }, { "epoch": 2.5312944398602504, "grad_norm": 0.06173046836415393, "learning_rate": 0.00010218843987585165, "loss": 0.837, "step": 144180 }, { "epoch": 2.531470004740252, "grad_norm": 0.06481432620981036, "learning_rate": 0.00010217729703866796, "loss": 0.8442, "step": 144190 }, { "epoch": 2.5316455696202533, "grad_norm": 0.059688425957187735, "learning_rate": 0.0001021661541865677, "loss": 0.8406, "step": 144200 }, { "epoch": 2.5318211345002544, "grad_norm": 0.07596246309292436, "learning_rate": 0.00010215501131969202, "loss": 0.8329, "step": 144210 }, { "epoch": 2.531996699380256, "grad_norm": 0.042385445910712666, "learning_rate": 0.00010214386843818215, "loss": 0.8397, "step": 144220 }, { "epoch": 2.5321722642602573, "grad_norm": 0.06918010427965082, "learning_rate": 0.00010213272554217922, "loss": 0.8458, "step": 144230 }, { "epoch": 2.532347829140259, "grad_norm": 0.052288260099418685, "learning_rate": 0.00010212158263182448, "loss": 0.8461, "step": 144240 }, { "epoch": 2.5325233940202603, "grad_norm": 0.047955869550531806, "learning_rate": 0.00010211043970725904, "loss": 0.8431, "step": 144250 }, { "epoch": 2.5326989589002618, "grad_norm": 0.05594957274267593, "learning_rate": 0.00010209929676862413, "loss": 0.8445, "step": 144260 }, { "epoch": 2.5328745237802632, "grad_norm": 0.046785056958387444, "learning_rate": 0.00010208815381606091, "loss": 0.849, "step": 144270 }, { "epoch": 2.5330500886602643, "grad_norm": 0.053502871926243784, "learning_rate": 0.00010207701084971058, "loss": 0.8462, "step": 144280 }, { "epoch": 2.5332256535402657, "grad_norm": 0.06364130437395492, "learning_rate": 0.00010206586786971433, "loss": 0.8363, "step": 144290 }, { "epoch": 2.533401218420267, "grad_norm": 0.05683247775459558, "learning_rate": 0.00010205472487621328, "loss": 0.8423, "step": 144300 }, { "epoch": 2.5335767833002687, "grad_norm": 0.0519159611717314, "learning_rate": 0.00010204358186934868, "loss": 0.8419, "step": 144310 }, { "epoch": 2.53375234818027, "grad_norm": 0.07093728518189547, "learning_rate": 0.0001020324388492617, "loss": 0.841, "step": 144320 }, { "epoch": 2.533927913060271, "grad_norm": 0.0685843281271463, "learning_rate": 0.00010202129581609359, "loss": 0.8351, "step": 144330 }, { "epoch": 2.5341034779402727, "grad_norm": 0.05906505409707912, "learning_rate": 0.0001020101527699854, "loss": 0.8358, "step": 144340 }, { "epoch": 2.534279042820274, "grad_norm": 0.048824994034561246, "learning_rate": 0.00010199900971107839, "loss": 0.8492, "step": 144350 }, { "epoch": 2.5344546077002756, "grad_norm": 0.05821740554587821, "learning_rate": 0.00010198786663951374, "loss": 0.8491, "step": 144360 }, { "epoch": 2.534630172580277, "grad_norm": 0.05377262560185397, "learning_rate": 0.00010197672355543262, "loss": 0.8453, "step": 144370 }, { "epoch": 2.5348057374602786, "grad_norm": 0.05582314628441169, "learning_rate": 0.00010196558045897622, "loss": 0.8462, "step": 144380 }, { "epoch": 2.53498130234028, "grad_norm": 0.049937731389901895, "learning_rate": 0.00010195443735028575, "loss": 0.8513, "step": 144390 }, { "epoch": 2.535156867220281, "grad_norm": 0.046747617082913966, "learning_rate": 0.00010194329422950237, "loss": 0.8448, "step": 144400 }, { "epoch": 2.5353324321002826, "grad_norm": 0.045071129301062066, "learning_rate": 0.00010193215109676729, "loss": 0.8432, "step": 144410 }, { "epoch": 2.535507996980284, "grad_norm": 0.061590295420515175, "learning_rate": 0.00010192100795222163, "loss": 0.8419, "step": 144420 }, { "epoch": 2.5356835618602855, "grad_norm": 0.054864349427898924, "learning_rate": 0.00010190986479600666, "loss": 0.8468, "step": 144430 }, { "epoch": 2.535859126740287, "grad_norm": 0.04997988179934848, "learning_rate": 0.00010189872162826353, "loss": 0.8502, "step": 144440 }, { "epoch": 2.536034691620288, "grad_norm": 0.06149772076252384, "learning_rate": 0.00010188757844913341, "loss": 0.8435, "step": 144450 }, { "epoch": 2.5362102565002895, "grad_norm": 0.08083725828211547, "learning_rate": 0.00010187643525875751, "loss": 0.852, "step": 144460 }, { "epoch": 2.536385821380291, "grad_norm": 0.06488596642745485, "learning_rate": 0.00010186529205727699, "loss": 0.8443, "step": 144470 }, { "epoch": 2.5365613862602925, "grad_norm": 0.05996403690704777, "learning_rate": 0.00010185414884483304, "loss": 0.8456, "step": 144480 }, { "epoch": 2.536736951140294, "grad_norm": 0.05914132578562475, "learning_rate": 0.00010184300562156693, "loss": 0.849, "step": 144490 }, { "epoch": 2.5369125160202954, "grad_norm": 0.05471811567553182, "learning_rate": 0.00010183186238761972, "loss": 0.8475, "step": 144500 }, { "epoch": 2.537088080900297, "grad_norm": 0.06303871161348656, "learning_rate": 0.0001018207191431327, "loss": 0.844, "step": 144510 }, { "epoch": 2.537263645780298, "grad_norm": 0.058058800540748065, "learning_rate": 0.00010180957588824693, "loss": 0.844, "step": 144520 }, { "epoch": 2.5374392106602994, "grad_norm": 0.04793444768644256, "learning_rate": 0.00010179843262310375, "loss": 0.8468, "step": 144530 }, { "epoch": 2.537614775540301, "grad_norm": 0.05785365742343034, "learning_rate": 0.00010178728934784425, "loss": 0.8422, "step": 144540 }, { "epoch": 2.5377903404203024, "grad_norm": 0.05845609424828943, "learning_rate": 0.00010177614606260966, "loss": 0.8473, "step": 144550 }, { "epoch": 2.537965905300304, "grad_norm": 0.06079905654199603, "learning_rate": 0.00010176500276754115, "loss": 0.8373, "step": 144560 }, { "epoch": 2.538141470180305, "grad_norm": 0.06554006279206685, "learning_rate": 0.00010175385946277986, "loss": 0.8396, "step": 144570 }, { "epoch": 2.5383170350603064, "grad_norm": 0.05718782101564981, "learning_rate": 0.00010174271614846707, "loss": 0.8384, "step": 144580 }, { "epoch": 2.538492599940308, "grad_norm": 0.056149209630912134, "learning_rate": 0.00010173157282474393, "loss": 0.8442, "step": 144590 }, { "epoch": 2.5386681648203093, "grad_norm": 0.055232908549785846, "learning_rate": 0.00010172042949175161, "loss": 0.8419, "step": 144600 }, { "epoch": 2.538843729700311, "grad_norm": 0.061850051139595436, "learning_rate": 0.0001017092861496313, "loss": 0.845, "step": 144610 }, { "epoch": 2.5390192945803123, "grad_norm": 0.04867572995144214, "learning_rate": 0.00010169814279852421, "loss": 0.8477, "step": 144620 }, { "epoch": 2.5391948594603138, "grad_norm": 0.08973743334317331, "learning_rate": 0.00010168699943857152, "loss": 0.8471, "step": 144630 }, { "epoch": 2.539370424340315, "grad_norm": 0.06956218533716965, "learning_rate": 0.00010167585606991438, "loss": 0.8416, "step": 144640 }, { "epoch": 2.5395459892203163, "grad_norm": 0.0658479959181583, "learning_rate": 0.00010166471269269404, "loss": 0.8472, "step": 144650 }, { "epoch": 2.5397215541003177, "grad_norm": 0.058252494935241504, "learning_rate": 0.00010165356930705168, "loss": 0.8488, "step": 144660 }, { "epoch": 2.5398971189803192, "grad_norm": 0.07004803287577482, "learning_rate": 0.00010164242591312847, "loss": 0.8413, "step": 144670 }, { "epoch": 2.5400726838603207, "grad_norm": 0.055773906507143395, "learning_rate": 0.0001016312825110656, "loss": 0.8525, "step": 144680 }, { "epoch": 2.5402482487403217, "grad_norm": 0.056751392129289396, "learning_rate": 0.00010162013910100423, "loss": 0.8439, "step": 144690 }, { "epoch": 2.5404238136203237, "grad_norm": 0.0621924878510073, "learning_rate": 0.00010160899568308562, "loss": 0.8448, "step": 144700 }, { "epoch": 2.5405993785003247, "grad_norm": 0.04592756572800951, "learning_rate": 0.00010159785225745086, "loss": 0.8445, "step": 144710 }, { "epoch": 2.540774943380326, "grad_norm": 0.05489442169792734, "learning_rate": 0.00010158670882424125, "loss": 0.8431, "step": 144720 }, { "epoch": 2.5409505082603276, "grad_norm": 0.045538711917369794, "learning_rate": 0.00010157556538359792, "loss": 0.849, "step": 144730 }, { "epoch": 2.541126073140329, "grad_norm": 0.052661033324916014, "learning_rate": 0.00010156442193566205, "loss": 0.8377, "step": 144740 }, { "epoch": 2.5413016380203306, "grad_norm": 0.04705759945810261, "learning_rate": 0.00010155327848057486, "loss": 0.8527, "step": 144750 }, { "epoch": 2.5414772029003316, "grad_norm": 0.06587685768727086, "learning_rate": 0.00010154213501847749, "loss": 0.8516, "step": 144760 }, { "epoch": 2.541652767780333, "grad_norm": 0.04693078142782494, "learning_rate": 0.0001015309915495112, "loss": 0.8443, "step": 144770 }, { "epoch": 2.5418283326603346, "grad_norm": 0.0727219481334051, "learning_rate": 0.00010151984807381713, "loss": 0.8395, "step": 144780 }, { "epoch": 2.542003897540336, "grad_norm": 0.05949711413971895, "learning_rate": 0.0001015087045915365, "loss": 0.8443, "step": 144790 }, { "epoch": 2.5421794624203375, "grad_norm": 0.04297050088318855, "learning_rate": 0.00010149756110281048, "loss": 0.8421, "step": 144800 }, { "epoch": 2.5423550273003386, "grad_norm": 0.053398916101025436, "learning_rate": 0.00010148641760778026, "loss": 0.8426, "step": 144810 }, { "epoch": 2.5425305921803405, "grad_norm": 0.06893171259816591, "learning_rate": 0.00010147527410658704, "loss": 0.8391, "step": 144820 }, { "epoch": 2.5427061570603415, "grad_norm": 0.05259010420760769, "learning_rate": 0.00010146413059937199, "loss": 0.8417, "step": 144830 }, { "epoch": 2.542881721940343, "grad_norm": 0.04647543262511668, "learning_rate": 0.00010145298708627633, "loss": 0.8496, "step": 144840 }, { "epoch": 2.5430572868203445, "grad_norm": 0.06046248625407634, "learning_rate": 0.00010144184356744126, "loss": 0.8439, "step": 144850 }, { "epoch": 2.543232851700346, "grad_norm": 0.05221153291169053, "learning_rate": 0.00010143070004300792, "loss": 0.8425, "step": 144860 }, { "epoch": 2.5434084165803474, "grad_norm": 0.04515723361578262, "learning_rate": 0.00010141955651311752, "loss": 0.846, "step": 144870 }, { "epoch": 2.5435839814603485, "grad_norm": 0.07598957018794181, "learning_rate": 0.00010140841297791128, "loss": 0.8501, "step": 144880 }, { "epoch": 2.54375954634035, "grad_norm": 0.043723290304553304, "learning_rate": 0.00010139726943753036, "loss": 0.8481, "step": 144890 }, { "epoch": 2.5439351112203514, "grad_norm": 0.06921229023281936, "learning_rate": 0.00010138612589211598, "loss": 0.8484, "step": 144900 }, { "epoch": 2.544110676100353, "grad_norm": 0.04743897426046057, "learning_rate": 0.00010137498234180926, "loss": 0.8532, "step": 144910 }, { "epoch": 2.5442862409803544, "grad_norm": 0.04090631601278316, "learning_rate": 0.00010136383878675151, "loss": 0.8536, "step": 144920 }, { "epoch": 2.5444618058603554, "grad_norm": 0.06315272754525823, "learning_rate": 0.00010135269522708379, "loss": 0.8409, "step": 144930 }, { "epoch": 2.5446373707403573, "grad_norm": 0.05290937526904948, "learning_rate": 0.00010134155166294741, "loss": 0.842, "step": 144940 }, { "epoch": 2.5448129356203584, "grad_norm": 0.05194938221519553, "learning_rate": 0.00010133040809448349, "loss": 0.843, "step": 144950 }, { "epoch": 2.54498850050036, "grad_norm": 0.05176302302682112, "learning_rate": 0.00010131926452183319, "loss": 0.8386, "step": 144960 }, { "epoch": 2.5451640653803613, "grad_norm": 0.047515023886895866, "learning_rate": 0.0001013081209451378, "loss": 0.8417, "step": 144970 }, { "epoch": 2.545339630260363, "grad_norm": 0.0578206852867643, "learning_rate": 0.00010129697736453841, "loss": 0.8504, "step": 144980 }, { "epoch": 2.5455151951403643, "grad_norm": 0.05066047032591141, "learning_rate": 0.00010128583378017627, "loss": 0.8384, "step": 144990 }, { "epoch": 2.5456907600203653, "grad_norm": 0.0643579102414522, "learning_rate": 0.0001012746901921926, "loss": 0.8488, "step": 145000 }, { "epoch": 2.545866324900367, "grad_norm": 0.05057623385838652, "learning_rate": 0.00010126354660072854, "loss": 0.844, "step": 145010 }, { "epoch": 2.5460418897803683, "grad_norm": 0.060996196798083165, "learning_rate": 0.00010125240300592529, "loss": 0.842, "step": 145020 }, { "epoch": 2.5462174546603698, "grad_norm": 0.0657188275066196, "learning_rate": 0.00010124125940792404, "loss": 0.8478, "step": 145030 }, { "epoch": 2.5463930195403712, "grad_norm": 0.04714476592435689, "learning_rate": 0.00010123011580686599, "loss": 0.843, "step": 145040 }, { "epoch": 2.5465685844203727, "grad_norm": 0.07733053067239935, "learning_rate": 0.00010121897220289232, "loss": 0.8329, "step": 145050 }, { "epoch": 2.546744149300374, "grad_norm": 0.06416761378815437, "learning_rate": 0.00010120782859614422, "loss": 0.8482, "step": 145060 }, { "epoch": 2.546919714180375, "grad_norm": 0.04802477667600119, "learning_rate": 0.00010119668498676293, "loss": 0.8481, "step": 145070 }, { "epoch": 2.5470952790603767, "grad_norm": 0.06169772444817597, "learning_rate": 0.00010118554137488955, "loss": 0.8414, "step": 145080 }, { "epoch": 2.547270843940378, "grad_norm": 0.06337408088746518, "learning_rate": 0.00010117439776066539, "loss": 0.8281, "step": 145090 }, { "epoch": 2.5474464088203796, "grad_norm": 0.053185567360471864, "learning_rate": 0.00010116325414423153, "loss": 0.8422, "step": 145100 }, { "epoch": 2.547621973700381, "grad_norm": 0.04621873375386792, "learning_rate": 0.00010115211052572924, "loss": 0.8373, "step": 145110 }, { "epoch": 2.547797538580382, "grad_norm": 0.049372102716025955, "learning_rate": 0.00010114096690529968, "loss": 0.8427, "step": 145120 }, { "epoch": 2.5479731034603836, "grad_norm": 0.05278620942892563, "learning_rate": 0.000101129823283084, "loss": 0.8393, "step": 145130 }, { "epoch": 2.548148668340385, "grad_norm": 0.06623980307345328, "learning_rate": 0.00010111867965922346, "loss": 0.8388, "step": 145140 }, { "epoch": 2.5483242332203866, "grad_norm": 0.055319029118582996, "learning_rate": 0.0001011075360338592, "loss": 0.8379, "step": 145150 }, { "epoch": 2.548499798100388, "grad_norm": 0.04736109967925576, "learning_rate": 0.00010109639240713247, "loss": 0.8437, "step": 145160 }, { "epoch": 2.5486753629803895, "grad_norm": 0.04428945695276519, "learning_rate": 0.00010108524877918444, "loss": 0.8464, "step": 145170 }, { "epoch": 2.548850927860391, "grad_norm": 0.05136090746388668, "learning_rate": 0.00010107410515015627, "loss": 0.8418, "step": 145180 }, { "epoch": 2.549026492740392, "grad_norm": 0.054464202599446856, "learning_rate": 0.0001010629615201892, "loss": 0.8474, "step": 145190 }, { "epoch": 2.5492020576203935, "grad_norm": 0.05040663039267084, "learning_rate": 0.00010105181788942435, "loss": 0.8413, "step": 145200 }, { "epoch": 2.549377622500395, "grad_norm": 0.06452319515705326, "learning_rate": 0.00010104067425800298, "loss": 0.8394, "step": 145210 }, { "epoch": 2.5495531873803965, "grad_norm": 0.04912930771701553, "learning_rate": 0.00010102953062606626, "loss": 0.8497, "step": 145220 }, { "epoch": 2.549728752260398, "grad_norm": 0.052677713748579914, "learning_rate": 0.00010101838699375539, "loss": 0.8518, "step": 145230 }, { "epoch": 2.549904317140399, "grad_norm": 0.05156330544295093, "learning_rate": 0.00010100724336121157, "loss": 0.8416, "step": 145240 }, { "epoch": 2.5500798820204005, "grad_norm": 0.07347596393768058, "learning_rate": 0.00010099609972857595, "loss": 0.8468, "step": 145250 }, { "epoch": 2.550255446900402, "grad_norm": 0.07107528092463682, "learning_rate": 0.00010098495609598973, "loss": 0.8385, "step": 145260 }, { "epoch": 2.5504310117804034, "grad_norm": 0.10097798218705023, "learning_rate": 0.00010097381246359415, "loss": 0.8516, "step": 145270 }, { "epoch": 2.550606576660405, "grad_norm": 0.07881473901503924, "learning_rate": 0.00010096266883153036, "loss": 0.8415, "step": 145280 }, { "epoch": 2.5507821415404064, "grad_norm": 0.04919089705435014, "learning_rate": 0.00010095152519993953, "loss": 0.8477, "step": 145290 }, { "epoch": 2.550957706420408, "grad_norm": 0.046771163228420454, "learning_rate": 0.00010094038156896293, "loss": 0.8455, "step": 145300 }, { "epoch": 2.551133271300409, "grad_norm": 0.058449524156204524, "learning_rate": 0.00010092923793874172, "loss": 0.8347, "step": 145310 }, { "epoch": 2.5513088361804104, "grad_norm": 0.042984196668141204, "learning_rate": 0.00010091809430941706, "loss": 0.8484, "step": 145320 }, { "epoch": 2.551484401060412, "grad_norm": 0.054942757970958704, "learning_rate": 0.00010090695068113009, "loss": 0.8449, "step": 145330 }, { "epoch": 2.5516599659404133, "grad_norm": 0.057068652608588705, "learning_rate": 0.00010089580705402215, "loss": 0.8381, "step": 145340 }, { "epoch": 2.551835530820415, "grad_norm": 0.04752293066358601, "learning_rate": 0.00010088466342823439, "loss": 0.8405, "step": 145350 }, { "epoch": 2.552011095700416, "grad_norm": 0.05978421852128778, "learning_rate": 0.00010087351980390787, "loss": 0.8448, "step": 145360 }, { "epoch": 2.5521866605804178, "grad_norm": 0.0481399387560657, "learning_rate": 0.00010086237618118395, "loss": 0.8463, "step": 145370 }, { "epoch": 2.552362225460419, "grad_norm": 0.06554957336424094, "learning_rate": 0.00010085123256020372, "loss": 0.848, "step": 145380 }, { "epoch": 2.5525377903404203, "grad_norm": 0.059838806875171235, "learning_rate": 0.00010084008894110839, "loss": 0.8446, "step": 145390 }, { "epoch": 2.5527133552204218, "grad_norm": 0.0908204614349178, "learning_rate": 0.0001008289453240392, "loss": 0.8414, "step": 145400 }, { "epoch": 2.5528889201004232, "grad_norm": 0.0428732652389429, "learning_rate": 0.00010081780170913729, "loss": 0.8386, "step": 145410 }, { "epoch": 2.5530644849804247, "grad_norm": 0.047394351095253716, "learning_rate": 0.00010080665809654388, "loss": 0.8455, "step": 145420 }, { "epoch": 2.5532400498604257, "grad_norm": 0.04656822576067342, "learning_rate": 0.0001007955144864001, "loss": 0.8497, "step": 145430 }, { "epoch": 2.553415614740427, "grad_norm": 0.04957968522549764, "learning_rate": 0.00010078437087884722, "loss": 0.8397, "step": 145440 }, { "epoch": 2.5535911796204287, "grad_norm": 0.043468376605640444, "learning_rate": 0.00010077322727402643, "loss": 0.8505, "step": 145450 }, { "epoch": 2.55376674450043, "grad_norm": 0.07847624441052135, "learning_rate": 0.00010076208367207885, "loss": 0.8469, "step": 145460 }, { "epoch": 2.5539423093804317, "grad_norm": 0.051330513576354736, "learning_rate": 0.00010075094007314573, "loss": 0.8478, "step": 145470 }, { "epoch": 2.5541178742604327, "grad_norm": 0.059310758878030785, "learning_rate": 0.00010073979647736828, "loss": 0.8338, "step": 145480 }, { "epoch": 2.5542934391404346, "grad_norm": 0.05595662786267769, "learning_rate": 0.00010072865288488762, "loss": 0.8457, "step": 145490 }, { "epoch": 2.5544690040204356, "grad_norm": 0.07012873411959922, "learning_rate": 0.00010071750929584496, "loss": 0.8416, "step": 145500 }, { "epoch": 2.554644568900437, "grad_norm": 0.04342683563765623, "learning_rate": 0.00010070636571038157, "loss": 0.8485, "step": 145510 }, { "epoch": 2.5548201337804386, "grad_norm": 0.06252645124893601, "learning_rate": 0.00010069522212863857, "loss": 0.8479, "step": 145520 }, { "epoch": 2.55499569866044, "grad_norm": 0.04538783878507589, "learning_rate": 0.00010068407855075713, "loss": 0.8462, "step": 145530 }, { "epoch": 2.5551712635404416, "grad_norm": 0.07186351028806649, "learning_rate": 0.00010067293497687849, "loss": 0.8468, "step": 145540 }, { "epoch": 2.5553468284204426, "grad_norm": 0.057958288808702345, "learning_rate": 0.00010066179140714384, "loss": 0.8417, "step": 145550 }, { "epoch": 2.555522393300444, "grad_norm": 0.05493087336234765, "learning_rate": 0.00010065064784169433, "loss": 0.8431, "step": 145560 }, { "epoch": 2.5556979581804455, "grad_norm": 0.04912410926654572, "learning_rate": 0.00010063950428067121, "loss": 0.8441, "step": 145570 }, { "epoch": 2.555873523060447, "grad_norm": 0.08780644148743749, "learning_rate": 0.00010062836072421565, "loss": 0.8393, "step": 145580 }, { "epoch": 2.5560490879404485, "grad_norm": 0.05624489549246763, "learning_rate": 0.00010061721717246881, "loss": 0.8458, "step": 145590 }, { "epoch": 2.5562246528204495, "grad_norm": 0.056588942492452464, "learning_rate": 0.00010060607362557186, "loss": 0.8499, "step": 145600 }, { "epoch": 2.5564002177004514, "grad_norm": 0.06800144440336547, "learning_rate": 0.00010059493008366607, "loss": 0.8498, "step": 145610 }, { "epoch": 2.5565757825804525, "grad_norm": 0.05594840231280971, "learning_rate": 0.00010058378654689262, "loss": 0.8402, "step": 145620 }, { "epoch": 2.556751347460454, "grad_norm": 0.0499615620064173, "learning_rate": 0.00010057264301539264, "loss": 0.846, "step": 145630 }, { "epoch": 2.5569269123404554, "grad_norm": 0.07878863223548171, "learning_rate": 0.00010056149948930738, "loss": 0.8425, "step": 145640 }, { "epoch": 2.557102477220457, "grad_norm": 0.04836017826252143, "learning_rate": 0.00010055035596877798, "loss": 0.8474, "step": 145650 }, { "epoch": 2.5572780421004584, "grad_norm": 0.05077379369849083, "learning_rate": 0.00010053921245394569, "loss": 0.8456, "step": 145660 }, { "epoch": 2.5574536069804594, "grad_norm": 0.058326395586401736, "learning_rate": 0.00010052806894495159, "loss": 0.8522, "step": 145670 }, { "epoch": 2.557629171860461, "grad_norm": 0.08557586024156495, "learning_rate": 0.00010051692544193703, "loss": 0.8378, "step": 145680 }, { "epoch": 2.5578047367404624, "grad_norm": 0.0694625651725857, "learning_rate": 0.00010050578194504313, "loss": 0.8372, "step": 145690 }, { "epoch": 2.557980301620464, "grad_norm": 0.05246393789326766, "learning_rate": 0.00010049463845441097, "loss": 0.8411, "step": 145700 }, { "epoch": 2.5581558665004653, "grad_norm": 0.04911594044564899, "learning_rate": 0.0001004834949701819, "loss": 0.847, "step": 145710 }, { "epoch": 2.558331431380467, "grad_norm": 0.04639987024437049, "learning_rate": 0.00010047235149249705, "loss": 0.8425, "step": 145720 }, { "epoch": 2.5585069962604683, "grad_norm": 0.05198394782596791, "learning_rate": 0.00010046120802149758, "loss": 0.8473, "step": 145730 }, { "epoch": 2.5586825611404693, "grad_norm": 0.08651410570655242, "learning_rate": 0.00010045006455732473, "loss": 0.8519, "step": 145740 }, { "epoch": 2.558858126020471, "grad_norm": 0.0741389680905672, "learning_rate": 0.00010043892110011967, "loss": 0.8494, "step": 145750 }, { "epoch": 2.5590336909004723, "grad_norm": 0.06813705236142924, "learning_rate": 0.0001004277776500236, "loss": 0.8505, "step": 145760 }, { "epoch": 2.5592092557804738, "grad_norm": 0.05110025789740185, "learning_rate": 0.00010041663420717764, "loss": 0.8414, "step": 145770 }, { "epoch": 2.5593848206604752, "grad_norm": 0.04963426523117195, "learning_rate": 0.00010040549077172308, "loss": 0.8482, "step": 145780 }, { "epoch": 2.5595603855404763, "grad_norm": 0.050690998094462196, "learning_rate": 0.00010039434734380106, "loss": 0.8399, "step": 145790 }, { "epoch": 2.5597359504204777, "grad_norm": 0.05761201805093799, "learning_rate": 0.00010038320392355276, "loss": 0.8497, "step": 145800 }, { "epoch": 2.5599115153004792, "grad_norm": 0.04710625332333758, "learning_rate": 0.00010037206051111942, "loss": 0.8421, "step": 145810 }, { "epoch": 2.5600870801804807, "grad_norm": 0.04157857919789309, "learning_rate": 0.0001003609171066422, "loss": 0.8496, "step": 145820 }, { "epoch": 2.560262645060482, "grad_norm": 0.04011833263340493, "learning_rate": 0.00010034977371026224, "loss": 0.8489, "step": 145830 }, { "epoch": 2.5604382099404837, "grad_norm": 0.040614760461466594, "learning_rate": 0.00010033863032212077, "loss": 0.8472, "step": 145840 }, { "epoch": 2.560613774820485, "grad_norm": 0.06816586913378796, "learning_rate": 0.00010032748694235898, "loss": 0.8555, "step": 145850 }, { "epoch": 2.560789339700486, "grad_norm": 0.0435392572235769, "learning_rate": 0.00010031634357111812, "loss": 0.8423, "step": 145860 }, { "epoch": 2.5609649045804876, "grad_norm": 0.043835891591403754, "learning_rate": 0.00010030520020853924, "loss": 0.8517, "step": 145870 }, { "epoch": 2.561140469460489, "grad_norm": 0.05576117389087177, "learning_rate": 0.00010029405685476365, "loss": 0.8428, "step": 145880 }, { "epoch": 2.5613160343404906, "grad_norm": 0.04820290473246229, "learning_rate": 0.00010028291350993248, "loss": 0.8499, "step": 145890 }, { "epoch": 2.561491599220492, "grad_norm": 0.0689576723216291, "learning_rate": 0.00010027177017418695, "loss": 0.8474, "step": 145900 }, { "epoch": 2.561667164100493, "grad_norm": 0.040884522783742466, "learning_rate": 0.00010026062684766822, "loss": 0.8524, "step": 145910 }, { "epoch": 2.5618427289804946, "grad_norm": 0.058052004220503814, "learning_rate": 0.00010024948353051751, "loss": 0.8462, "step": 145920 }, { "epoch": 2.562018293860496, "grad_norm": 0.08634507108634117, "learning_rate": 0.00010023834022287597, "loss": 0.8458, "step": 145930 }, { "epoch": 2.5621938587404975, "grad_norm": 0.06060628719761223, "learning_rate": 0.00010022719692488475, "loss": 0.842, "step": 145940 }, { "epoch": 2.562369423620499, "grad_norm": 0.04711331136288181, "learning_rate": 0.00010021605363668519, "loss": 0.8458, "step": 145950 }, { "epoch": 2.5625449885005005, "grad_norm": 0.07205084655469775, "learning_rate": 0.00010020491035841832, "loss": 0.8442, "step": 145960 }, { "epoch": 2.562720553380502, "grad_norm": 0.053156267061501596, "learning_rate": 0.00010019376709022539, "loss": 0.8458, "step": 145970 }, { "epoch": 2.562896118260503, "grad_norm": 0.057277012810616236, "learning_rate": 0.00010018262383224762, "loss": 0.8452, "step": 145980 }, { "epoch": 2.5630716831405045, "grad_norm": 0.058252548250647115, "learning_rate": 0.00010017148058462616, "loss": 0.8349, "step": 145990 }, { "epoch": 2.563247248020506, "grad_norm": 0.048497540432814096, "learning_rate": 0.00010016033734750219, "loss": 0.8445, "step": 146000 }, { "epoch": 2.5634228129005074, "grad_norm": 0.05898967218380269, "learning_rate": 0.00010014919412101686, "loss": 0.8461, "step": 146010 }, { "epoch": 2.563598377780509, "grad_norm": 0.051075791943676646, "learning_rate": 0.00010013805090531146, "loss": 0.8505, "step": 146020 }, { "epoch": 2.56377394266051, "grad_norm": 0.06490550773787757, "learning_rate": 0.00010012690770052712, "loss": 0.8463, "step": 146030 }, { "epoch": 2.5639495075405114, "grad_norm": 0.07285058734529765, "learning_rate": 0.00010011576450680498, "loss": 0.8458, "step": 146040 }, { "epoch": 2.564125072420513, "grad_norm": 0.043682219164542437, "learning_rate": 0.00010010462132428631, "loss": 0.835, "step": 146050 }, { "epoch": 2.5643006373005144, "grad_norm": 0.05490195993816553, "learning_rate": 0.00010009347815311226, "loss": 0.8439, "step": 146060 }, { "epoch": 2.564476202180516, "grad_norm": 0.05672823267219942, "learning_rate": 0.00010008233499342401, "loss": 0.8442, "step": 146070 }, { "epoch": 2.5646517670605173, "grad_norm": 0.07290858444767635, "learning_rate": 0.00010007119184536275, "loss": 0.8387, "step": 146080 }, { "epoch": 2.564827331940519, "grad_norm": 0.044706142305627375, "learning_rate": 0.00010006004870906968, "loss": 0.8421, "step": 146090 }, { "epoch": 2.56500289682052, "grad_norm": 0.055519367290141716, "learning_rate": 0.00010004890558468596, "loss": 0.8451, "step": 146100 }, { "epoch": 2.5651784617005213, "grad_norm": 0.05209146995553215, "learning_rate": 0.00010003776247235274, "loss": 0.8518, "step": 146110 }, { "epoch": 2.565354026580523, "grad_norm": 0.06306903738286265, "learning_rate": 0.00010002661937221134, "loss": 0.8424, "step": 146120 }, { "epoch": 2.5655295914605243, "grad_norm": 0.05934446811937853, "learning_rate": 0.00010001547628440282, "loss": 0.8426, "step": 146130 }, { "epoch": 2.5657051563405258, "grad_norm": 0.06041377447219753, "learning_rate": 0.0001000043332090684, "loss": 0.8484, "step": 146140 }, { "epoch": 2.565880721220527, "grad_norm": 0.06262626094541089, "learning_rate": 9.999319014634928e-05, "loss": 0.8421, "step": 146150 }, { "epoch": 2.5660562861005287, "grad_norm": 0.06411044017580053, "learning_rate": 9.998204709638664e-05, "loss": 0.8363, "step": 146160 }, { "epoch": 2.5662318509805297, "grad_norm": 0.07691173482900454, "learning_rate": 9.997090405932165e-05, "loss": 0.8519, "step": 146170 }, { "epoch": 2.5664074158605312, "grad_norm": 0.0621556625099902, "learning_rate": 9.995976103529547e-05, "loss": 0.8449, "step": 146180 }, { "epoch": 2.5665829807405327, "grad_norm": 0.0572128418196605, "learning_rate": 9.994861802444937e-05, "loss": 0.8421, "step": 146190 }, { "epoch": 2.566758545620534, "grad_norm": 0.06843379169498615, "learning_rate": 9.993747502692448e-05, "loss": 0.8495, "step": 146200 }, { "epoch": 2.5669341105005357, "grad_norm": 0.057601706552095684, "learning_rate": 9.992633204286192e-05, "loss": 0.8451, "step": 146210 }, { "epoch": 2.5671096753805367, "grad_norm": 0.04885623182275867, "learning_rate": 9.9915189072403e-05, "loss": 0.8457, "step": 146220 }, { "epoch": 2.567285240260538, "grad_norm": 0.06002304203262183, "learning_rate": 9.990404611568883e-05, "loss": 0.8452, "step": 146230 }, { "epoch": 2.5674608051405396, "grad_norm": 0.04814948853304108, "learning_rate": 9.989290317286058e-05, "loss": 0.8445, "step": 146240 }, { "epoch": 2.567636370020541, "grad_norm": 0.048274489983868005, "learning_rate": 9.988176024405949e-05, "loss": 0.8466, "step": 146250 }, { "epoch": 2.5678119349005426, "grad_norm": 0.06738956276566968, "learning_rate": 9.987061732942672e-05, "loss": 0.8464, "step": 146260 }, { "epoch": 2.5679874997805436, "grad_norm": 0.04433818650317746, "learning_rate": 9.985947442910343e-05, "loss": 0.8473, "step": 146270 }, { "epoch": 2.5681630646605456, "grad_norm": 0.05861951733924947, "learning_rate": 9.984833154323077e-05, "loss": 0.8446, "step": 146280 }, { "epoch": 2.5683386295405466, "grad_norm": 0.06823972124733521, "learning_rate": 9.983718867195004e-05, "loss": 0.8516, "step": 146290 }, { "epoch": 2.568514194420548, "grad_norm": 0.09128356034849289, "learning_rate": 9.982604581540232e-05, "loss": 0.843, "step": 146300 }, { "epoch": 2.5686897593005495, "grad_norm": 0.04691055569511221, "learning_rate": 9.981490297372883e-05, "loss": 0.8413, "step": 146310 }, { "epoch": 2.568865324180551, "grad_norm": 0.045782214712630394, "learning_rate": 9.980376014707074e-05, "loss": 0.8535, "step": 146320 }, { "epoch": 2.5690408890605525, "grad_norm": 0.04617833688510015, "learning_rate": 9.979261733556926e-05, "loss": 0.8433, "step": 146330 }, { "epoch": 2.5692164539405535, "grad_norm": 0.054647884435286, "learning_rate": 9.978147453936555e-05, "loss": 0.8429, "step": 146340 }, { "epoch": 2.569392018820555, "grad_norm": 0.04570658946414179, "learning_rate": 9.977033175860075e-05, "loss": 0.8404, "step": 146350 }, { "epoch": 2.5695675837005565, "grad_norm": 0.052241941521705124, "learning_rate": 9.975918899341611e-05, "loss": 0.8344, "step": 146360 }, { "epoch": 2.569743148580558, "grad_norm": 0.050142087843227934, "learning_rate": 9.974804624395281e-05, "loss": 0.8408, "step": 146370 }, { "epoch": 2.5699187134605594, "grad_norm": 0.06420525371674392, "learning_rate": 9.973690351035193e-05, "loss": 0.8461, "step": 146380 }, { "epoch": 2.5700942783405605, "grad_norm": 0.053281292810661435, "learning_rate": 9.972576079275479e-05, "loss": 0.8457, "step": 146390 }, { "epoch": 2.5702698432205624, "grad_norm": 0.056630073054151936, "learning_rate": 9.971461809130249e-05, "loss": 0.8449, "step": 146400 }, { "epoch": 2.5704454081005634, "grad_norm": 0.04935767124659955, "learning_rate": 9.97034754061362e-05, "loss": 0.852, "step": 146410 }, { "epoch": 2.570620972980565, "grad_norm": 0.04004155627914177, "learning_rate": 9.969233273739716e-05, "loss": 0.8421, "step": 146420 }, { "epoch": 2.5707965378605664, "grad_norm": 0.04995995451069953, "learning_rate": 9.968119008522651e-05, "loss": 0.8488, "step": 146430 }, { "epoch": 2.570972102740568, "grad_norm": 0.05175085594660993, "learning_rate": 9.967004744976543e-05, "loss": 0.8485, "step": 146440 }, { "epoch": 2.5711476676205693, "grad_norm": 0.05503301134090389, "learning_rate": 9.965890483115507e-05, "loss": 0.8435, "step": 146450 }, { "epoch": 2.5713232325005704, "grad_norm": 0.05187030000384985, "learning_rate": 9.964776222953668e-05, "loss": 0.8476, "step": 146460 }, { "epoch": 2.571498797380572, "grad_norm": 0.07195463168495114, "learning_rate": 9.96366196450514e-05, "loss": 0.849, "step": 146470 }, { "epoch": 2.5716743622605733, "grad_norm": 0.058179058236588796, "learning_rate": 9.962547707784039e-05, "loss": 0.844, "step": 146480 }, { "epoch": 2.571849927140575, "grad_norm": 0.058948297848585025, "learning_rate": 9.961433452804486e-05, "loss": 0.8453, "step": 146490 }, { "epoch": 2.5720254920205763, "grad_norm": 0.051401258154649104, "learning_rate": 9.9603191995806e-05, "loss": 0.8398, "step": 146500 }, { "epoch": 2.5722010569005778, "grad_norm": 0.049725620164639564, "learning_rate": 9.959204948126494e-05, "loss": 0.8463, "step": 146510 }, { "epoch": 2.5723766217805792, "grad_norm": 0.05110050481661468, "learning_rate": 9.958090698456286e-05, "loss": 0.8404, "step": 146520 }, { "epoch": 2.5725521866605803, "grad_norm": 0.054169003711872034, "learning_rate": 9.9569764505841e-05, "loss": 0.843, "step": 146530 }, { "epoch": 2.5727277515405818, "grad_norm": 0.07868375675623045, "learning_rate": 9.955862204524051e-05, "loss": 0.8455, "step": 146540 }, { "epoch": 2.5729033164205832, "grad_norm": 0.051763139387078834, "learning_rate": 9.95474796029025e-05, "loss": 0.8494, "step": 146550 }, { "epoch": 2.5730788813005847, "grad_norm": 0.08367336332910742, "learning_rate": 9.953633717896825e-05, "loss": 0.8442, "step": 146560 }, { "epoch": 2.573254446180586, "grad_norm": 0.0486173153768386, "learning_rate": 9.952519477357886e-05, "loss": 0.8514, "step": 146570 }, { "epoch": 2.573430011060587, "grad_norm": 0.06418235208669962, "learning_rate": 9.951405238687554e-05, "loss": 0.8488, "step": 146580 }, { "epoch": 2.5736055759405887, "grad_norm": 0.06468097487080823, "learning_rate": 9.950291001899947e-05, "loss": 0.8472, "step": 146590 }, { "epoch": 2.57378114082059, "grad_norm": 0.05406673332022471, "learning_rate": 9.949176767009184e-05, "loss": 0.8465, "step": 146600 }, { "epoch": 2.5739567057005917, "grad_norm": 0.0784857714825616, "learning_rate": 9.948062534029378e-05, "loss": 0.8418, "step": 146610 }, { "epoch": 2.574132270580593, "grad_norm": 0.060074209029519966, "learning_rate": 9.946948302974644e-05, "loss": 0.8512, "step": 146620 }, { "epoch": 2.5743078354605946, "grad_norm": 0.05852566639122022, "learning_rate": 9.94583407385911e-05, "loss": 0.8508, "step": 146630 }, { "epoch": 2.574483400340596, "grad_norm": 0.060288112310218626, "learning_rate": 9.944719846696888e-05, "loss": 0.8483, "step": 146640 }, { "epoch": 2.574658965220597, "grad_norm": 0.05522370146116506, "learning_rate": 9.943605621502092e-05, "loss": 0.8484, "step": 146650 }, { "epoch": 2.5748345301005986, "grad_norm": 0.05605921153263445, "learning_rate": 9.942491398288845e-05, "loss": 0.8505, "step": 146660 }, { "epoch": 2.5750100949806, "grad_norm": 0.049999839028364156, "learning_rate": 9.941377177071264e-05, "loss": 0.8396, "step": 146670 }, { "epoch": 2.5751856598606016, "grad_norm": 0.06368342068855877, "learning_rate": 9.940262957863464e-05, "loss": 0.8512, "step": 146680 }, { "epoch": 2.575361224740603, "grad_norm": 0.04605098255555727, "learning_rate": 9.939148740679557e-05, "loss": 0.847, "step": 146690 }, { "epoch": 2.575536789620604, "grad_norm": 0.05755265984221994, "learning_rate": 9.93803452553367e-05, "loss": 0.846, "step": 146700 }, { "epoch": 2.5757123545006055, "grad_norm": 0.0501450403001465, "learning_rate": 9.936920312439922e-05, "loss": 0.8441, "step": 146710 }, { "epoch": 2.575887919380607, "grad_norm": 0.07431845130723996, "learning_rate": 9.935806101412417e-05, "loss": 0.8452, "step": 146720 }, { "epoch": 2.5760634842606085, "grad_norm": 0.053549406383956485, "learning_rate": 9.934691892465284e-05, "loss": 0.8453, "step": 146730 }, { "epoch": 2.57623904914061, "grad_norm": 0.04132751255295063, "learning_rate": 9.933577685612635e-05, "loss": 0.8431, "step": 146740 }, { "epoch": 2.5764146140206114, "grad_norm": 0.05488265057348745, "learning_rate": 9.93246348086859e-05, "loss": 0.839, "step": 146750 }, { "epoch": 2.576590178900613, "grad_norm": 0.05501389852167699, "learning_rate": 9.931349278247264e-05, "loss": 0.8495, "step": 146760 }, { "epoch": 2.576765743780614, "grad_norm": 0.044275427210355114, "learning_rate": 9.930235077762776e-05, "loss": 0.8469, "step": 146770 }, { "epoch": 2.5769413086606154, "grad_norm": 0.04419997773267886, "learning_rate": 9.929120879429241e-05, "loss": 0.8431, "step": 146780 }, { "epoch": 2.577116873540617, "grad_norm": 0.13742601762718246, "learning_rate": 9.928006683260774e-05, "loss": 0.8455, "step": 146790 }, { "epoch": 2.5772924384206184, "grad_norm": 0.07062651950502362, "learning_rate": 9.926892489271501e-05, "loss": 0.8451, "step": 146800 }, { "epoch": 2.57746800330062, "grad_norm": 0.07030458373503558, "learning_rate": 9.92577829747553e-05, "loss": 0.8417, "step": 146810 }, { "epoch": 2.577643568180621, "grad_norm": 0.04772812792552056, "learning_rate": 9.924664107886982e-05, "loss": 0.8503, "step": 146820 }, { "epoch": 2.577819133060623, "grad_norm": 0.06807692920424081, "learning_rate": 9.923549920519975e-05, "loss": 0.8515, "step": 146830 }, { "epoch": 2.577994697940624, "grad_norm": 0.047433431841640374, "learning_rate": 9.922435735388624e-05, "loss": 0.8466, "step": 146840 }, { "epoch": 2.5781702628206253, "grad_norm": 0.042899147158382445, "learning_rate": 9.921321552507046e-05, "loss": 0.8445, "step": 146850 }, { "epoch": 2.578345827700627, "grad_norm": 0.05178923122160359, "learning_rate": 9.920207371889354e-05, "loss": 0.8447, "step": 146860 }, { "epoch": 2.5785213925806283, "grad_norm": 0.06289465386682991, "learning_rate": 9.919093193549673e-05, "loss": 0.8452, "step": 146870 }, { "epoch": 2.5786969574606298, "grad_norm": 0.05687207959644087, "learning_rate": 9.917979017502119e-05, "loss": 0.8382, "step": 146880 }, { "epoch": 2.578872522340631, "grad_norm": 0.057020995779003276, "learning_rate": 9.916864843760798e-05, "loss": 0.8451, "step": 146890 }, { "epoch": 2.5790480872206323, "grad_norm": 0.055882517786806224, "learning_rate": 9.91575067233984e-05, "loss": 0.8483, "step": 146900 }, { "epoch": 2.5792236521006338, "grad_norm": 0.04074806932181728, "learning_rate": 9.914636503253357e-05, "loss": 0.842, "step": 146910 }, { "epoch": 2.5793992169806352, "grad_norm": 0.05098606334725768, "learning_rate": 9.913522336515461e-05, "loss": 0.8482, "step": 146920 }, { "epoch": 2.5795747818606367, "grad_norm": 0.05140615670859901, "learning_rate": 9.912408172140275e-05, "loss": 0.8467, "step": 146930 }, { "epoch": 2.5797503467406377, "grad_norm": 0.053680007212813644, "learning_rate": 9.911294010141914e-05, "loss": 0.8453, "step": 146940 }, { "epoch": 2.5799259116206397, "grad_norm": 0.057842901393784465, "learning_rate": 9.910179850534495e-05, "loss": 0.8392, "step": 146950 }, { "epoch": 2.5801014765006407, "grad_norm": 0.05418440999961439, "learning_rate": 9.909065693332127e-05, "loss": 0.853, "step": 146960 }, { "epoch": 2.580277041380642, "grad_norm": 0.04857181384640481, "learning_rate": 9.907951538548939e-05, "loss": 0.8382, "step": 146970 }, { "epoch": 2.5804526062606437, "grad_norm": 0.059653497572903946, "learning_rate": 9.906837386199043e-05, "loss": 0.8407, "step": 146980 }, { "epoch": 2.580628171140645, "grad_norm": 0.06510525268681691, "learning_rate": 9.905723236296551e-05, "loss": 0.8463, "step": 146990 }, { "epoch": 2.5808037360206466, "grad_norm": 0.05585266174190998, "learning_rate": 9.904609088855584e-05, "loss": 0.8497, "step": 147000 }, { "epoch": 2.5809793009006476, "grad_norm": 0.05450246813805095, "learning_rate": 9.90349494389026e-05, "loss": 0.8414, "step": 147010 }, { "epoch": 2.581154865780649, "grad_norm": 0.05930219351235824, "learning_rate": 9.902380801414689e-05, "loss": 0.8448, "step": 147020 }, { "epoch": 2.5813304306606506, "grad_norm": 0.07791015329286377, "learning_rate": 9.90126666144299e-05, "loss": 0.8436, "step": 147030 }, { "epoch": 2.581505995540652, "grad_norm": 0.05423156103069297, "learning_rate": 9.900152523989284e-05, "loss": 0.8487, "step": 147040 }, { "epoch": 2.5816815604206536, "grad_norm": 0.054649649782138265, "learning_rate": 9.899038389067686e-05, "loss": 0.8525, "step": 147050 }, { "epoch": 2.5818571253006546, "grad_norm": 0.04645690101660406, "learning_rate": 9.897924256692303e-05, "loss": 0.8479, "step": 147060 }, { "epoch": 2.5820326901806565, "grad_norm": 0.060930932662461655, "learning_rate": 9.896810126877262e-05, "loss": 0.8434, "step": 147070 }, { "epoch": 2.5822082550606575, "grad_norm": 0.06339822495109591, "learning_rate": 9.895695999636676e-05, "loss": 0.8382, "step": 147080 }, { "epoch": 2.582383819940659, "grad_norm": 0.10079415538372968, "learning_rate": 9.89458187498466e-05, "loss": 0.8427, "step": 147090 }, { "epoch": 2.5825593848206605, "grad_norm": 0.08176275764383532, "learning_rate": 9.893467752935331e-05, "loss": 0.8386, "step": 147100 }, { "epoch": 2.582734949700662, "grad_norm": 0.08027292849750255, "learning_rate": 9.892353633502808e-05, "loss": 0.8444, "step": 147110 }, { "epoch": 2.5829105145806635, "grad_norm": 0.07872449391987432, "learning_rate": 9.891239516701203e-05, "loss": 0.841, "step": 147120 }, { "epoch": 2.5830860794606645, "grad_norm": 0.07268053572640772, "learning_rate": 9.890125402544629e-05, "loss": 0.85, "step": 147130 }, { "epoch": 2.583261644340666, "grad_norm": 0.05088040697868812, "learning_rate": 9.889011291047209e-05, "loss": 0.8442, "step": 147140 }, { "epoch": 2.5834372092206674, "grad_norm": 0.0542506736941002, "learning_rate": 9.887897182223058e-05, "loss": 0.8424, "step": 147150 }, { "epoch": 2.583612774100669, "grad_norm": 0.04946359346567688, "learning_rate": 9.886783076086288e-05, "loss": 0.8471, "step": 147160 }, { "epoch": 2.5837883389806704, "grad_norm": 0.050728259705896624, "learning_rate": 9.88566897265102e-05, "loss": 0.8468, "step": 147170 }, { "epoch": 2.583963903860672, "grad_norm": 0.055829763616227705, "learning_rate": 9.884554871931367e-05, "loss": 0.8373, "step": 147180 }, { "epoch": 2.5841394687406734, "grad_norm": 0.05485295717195287, "learning_rate": 9.883440773941445e-05, "loss": 0.8428, "step": 147190 }, { "epoch": 2.5843150336206744, "grad_norm": 0.05086155268313271, "learning_rate": 9.882326678695365e-05, "loss": 0.8381, "step": 147200 }, { "epoch": 2.584490598500676, "grad_norm": 0.04475620358525788, "learning_rate": 9.881212586207253e-05, "loss": 0.8366, "step": 147210 }, { "epoch": 2.5846661633806773, "grad_norm": 0.0467774506776065, "learning_rate": 9.88009849649122e-05, "loss": 0.848, "step": 147220 }, { "epoch": 2.584841728260679, "grad_norm": 0.062065717656144104, "learning_rate": 9.878984409561376e-05, "loss": 0.8497, "step": 147230 }, { "epoch": 2.5850172931406803, "grad_norm": 0.050966388968865586, "learning_rate": 9.877870325431846e-05, "loss": 0.848, "step": 147240 }, { "epoch": 2.5851928580206813, "grad_norm": 0.07225026952756464, "learning_rate": 9.876756244116742e-05, "loss": 0.8391, "step": 147250 }, { "epoch": 2.585368422900683, "grad_norm": 0.04772454106130492, "learning_rate": 9.87564216563018e-05, "loss": 0.8475, "step": 147260 }, { "epoch": 2.5855439877806843, "grad_norm": 0.057434879090166334, "learning_rate": 9.874528089986269e-05, "loss": 0.8507, "step": 147270 }, { "epoch": 2.5857195526606858, "grad_norm": 0.05472384060991604, "learning_rate": 9.873414017199136e-05, "loss": 0.8476, "step": 147280 }, { "epoch": 2.5858951175406872, "grad_norm": 0.07035272721761722, "learning_rate": 9.872299947282889e-05, "loss": 0.8423, "step": 147290 }, { "epoch": 2.5860706824206887, "grad_norm": 0.06449830045253888, "learning_rate": 9.871185880251642e-05, "loss": 0.8463, "step": 147300 }, { "epoch": 2.58624624730069, "grad_norm": 0.048455860455401734, "learning_rate": 9.870071816119517e-05, "loss": 0.8461, "step": 147310 }, { "epoch": 2.5864218121806912, "grad_norm": 0.05796943927000105, "learning_rate": 9.868957754900628e-05, "loss": 0.8409, "step": 147320 }, { "epoch": 2.5865973770606927, "grad_norm": 0.04478078236577288, "learning_rate": 9.867843696609084e-05, "loss": 0.85, "step": 147330 }, { "epoch": 2.586772941940694, "grad_norm": 0.048662074837266485, "learning_rate": 9.866729641259008e-05, "loss": 0.8509, "step": 147340 }, { "epoch": 2.5869485068206957, "grad_norm": 0.06708825880288484, "learning_rate": 9.865615588864513e-05, "loss": 0.8443, "step": 147350 }, { "epoch": 2.587124071700697, "grad_norm": 0.06655146176009022, "learning_rate": 9.864501539439712e-05, "loss": 0.8456, "step": 147360 }, { "epoch": 2.587299636580698, "grad_norm": 0.06314165800050596, "learning_rate": 9.863387492998719e-05, "loss": 0.849, "step": 147370 }, { "epoch": 2.5874752014606996, "grad_norm": 0.059545518689901905, "learning_rate": 9.862273449555654e-05, "loss": 0.8447, "step": 147380 }, { "epoch": 2.587650766340701, "grad_norm": 0.050097119446454265, "learning_rate": 9.861159409124634e-05, "loss": 0.8541, "step": 147390 }, { "epoch": 2.5878263312207026, "grad_norm": 0.06473059704513145, "learning_rate": 9.860045371719763e-05, "loss": 0.8515, "step": 147400 }, { "epoch": 2.588001896100704, "grad_norm": 0.05314135392610301, "learning_rate": 9.858931337355168e-05, "loss": 0.8494, "step": 147410 }, { "epoch": 2.5881774609807056, "grad_norm": 0.05391444701996537, "learning_rate": 9.857817306044957e-05, "loss": 0.8502, "step": 147420 }, { "epoch": 2.588353025860707, "grad_norm": 0.04853816537706338, "learning_rate": 9.856703277803249e-05, "loss": 0.8456, "step": 147430 }, { "epoch": 2.588528590740708, "grad_norm": 0.06121508391020887, "learning_rate": 9.855589252644156e-05, "loss": 0.8414, "step": 147440 }, { "epoch": 2.5887041556207095, "grad_norm": 0.04437211078803025, "learning_rate": 9.854475230581794e-05, "loss": 0.8339, "step": 147450 }, { "epoch": 2.588879720500711, "grad_norm": 0.07079537428045025, "learning_rate": 9.853361211630278e-05, "loss": 0.8447, "step": 147460 }, { "epoch": 2.5890552853807125, "grad_norm": 0.05458759267848393, "learning_rate": 9.852247195803719e-05, "loss": 0.8484, "step": 147470 }, { "epoch": 2.589230850260714, "grad_norm": 0.048226157971619735, "learning_rate": 9.851133183116241e-05, "loss": 0.8439, "step": 147480 }, { "epoch": 2.589406415140715, "grad_norm": 0.05465416180096269, "learning_rate": 9.85001917358195e-05, "loss": 0.8452, "step": 147490 }, { "epoch": 2.5895819800207165, "grad_norm": 0.058749663987442406, "learning_rate": 9.848905167214964e-05, "loss": 0.8469, "step": 147500 }, { "epoch": 2.589757544900718, "grad_norm": 0.0740518944977814, "learning_rate": 9.8477911640294e-05, "loss": 0.8375, "step": 147510 }, { "epoch": 2.5899331097807194, "grad_norm": 0.05514573478608806, "learning_rate": 9.84667716403937e-05, "loss": 0.8482, "step": 147520 }, { "epoch": 2.590108674660721, "grad_norm": 0.0856644123462127, "learning_rate": 9.84556316725899e-05, "loss": 0.8496, "step": 147530 }, { "epoch": 2.5902842395407224, "grad_norm": 0.05269314385537763, "learning_rate": 9.844449173702368e-05, "loss": 0.8417, "step": 147540 }, { "epoch": 2.590459804420724, "grad_norm": 0.04941095457436785, "learning_rate": 9.843335183383627e-05, "loss": 0.8407, "step": 147550 }, { "epoch": 2.590635369300725, "grad_norm": 0.05950228320757808, "learning_rate": 9.842221196316883e-05, "loss": 0.8409, "step": 147560 }, { "epoch": 2.5908109341807264, "grad_norm": 0.050507856740222803, "learning_rate": 9.841107212516239e-05, "loss": 0.8404, "step": 147570 }, { "epoch": 2.590986499060728, "grad_norm": 0.04948154635320294, "learning_rate": 9.839993231995819e-05, "loss": 0.8459, "step": 147580 }, { "epoch": 2.5911620639407293, "grad_norm": 0.04823070699373961, "learning_rate": 9.838879254769735e-05, "loss": 0.8426, "step": 147590 }, { "epoch": 2.591337628820731, "grad_norm": 0.04937029494831054, "learning_rate": 9.837765280852101e-05, "loss": 0.8472, "step": 147600 }, { "epoch": 2.591513193700732, "grad_norm": 0.05753203884437324, "learning_rate": 9.83665131025703e-05, "loss": 0.8454, "step": 147610 }, { "epoch": 2.5916887585807338, "grad_norm": 0.06463382112193573, "learning_rate": 9.835537342998639e-05, "loss": 0.8493, "step": 147620 }, { "epoch": 2.591864323460735, "grad_norm": 0.058510801296406456, "learning_rate": 9.834423379091038e-05, "loss": 0.8376, "step": 147630 }, { "epoch": 2.5920398883407363, "grad_norm": 0.058895582633066774, "learning_rate": 9.833309418548344e-05, "loss": 0.845, "step": 147640 }, { "epoch": 2.5922154532207378, "grad_norm": 0.057129868886500686, "learning_rate": 9.832195461384672e-05, "loss": 0.8498, "step": 147650 }, { "epoch": 2.5923910181007392, "grad_norm": 0.05135041684763619, "learning_rate": 9.831081507614136e-05, "loss": 0.8399, "step": 147660 }, { "epoch": 2.5925665829807407, "grad_norm": 0.04862838960195055, "learning_rate": 9.829967557250846e-05, "loss": 0.848, "step": 147670 }, { "epoch": 2.5927421478607418, "grad_norm": 0.046818654094750174, "learning_rate": 9.828853610308921e-05, "loss": 0.8526, "step": 147680 }, { "epoch": 2.5929177127407432, "grad_norm": 0.07216657285794623, "learning_rate": 9.827739666802473e-05, "loss": 0.8433, "step": 147690 }, { "epoch": 2.5930932776207447, "grad_norm": 0.05258533587038273, "learning_rate": 9.826625726745616e-05, "loss": 0.8427, "step": 147700 }, { "epoch": 2.593268842500746, "grad_norm": 0.06267471533086195, "learning_rate": 9.825511790152459e-05, "loss": 0.8451, "step": 147710 }, { "epoch": 2.5934444073807477, "grad_norm": 0.058913777732563906, "learning_rate": 9.824397857037123e-05, "loss": 0.8502, "step": 147720 }, { "epoch": 2.5936199722607487, "grad_norm": 0.0985323161287128, "learning_rate": 9.823283927413724e-05, "loss": 0.8398, "step": 147730 }, { "epoch": 2.5937955371407506, "grad_norm": 0.0591748145872972, "learning_rate": 9.822170001296362e-05, "loss": 0.8416, "step": 147740 }, { "epoch": 2.5939711020207517, "grad_norm": 0.048353865559411326, "learning_rate": 9.821056078699163e-05, "loss": 0.8433, "step": 147750 }, { "epoch": 2.594146666900753, "grad_norm": 0.05181039510104841, "learning_rate": 9.819942159636238e-05, "loss": 0.8448, "step": 147760 }, { "epoch": 2.5943222317807546, "grad_norm": 0.05081592328824589, "learning_rate": 9.818828244121701e-05, "loss": 0.8488, "step": 147770 }, { "epoch": 2.594497796660756, "grad_norm": 0.06100357490359965, "learning_rate": 9.81771433216966e-05, "loss": 0.8468, "step": 147780 }, { "epoch": 2.5946733615407576, "grad_norm": 0.06202976605167687, "learning_rate": 9.816600423794235e-05, "loss": 0.8389, "step": 147790 }, { "epoch": 2.5948489264207586, "grad_norm": 0.06234411669547329, "learning_rate": 9.815486519009537e-05, "loss": 0.8576, "step": 147800 }, { "epoch": 2.59502449130076, "grad_norm": 0.06452239213985769, "learning_rate": 9.814372617829675e-05, "loss": 0.8406, "step": 147810 }, { "epoch": 2.5952000561807615, "grad_norm": 0.08223888370568902, "learning_rate": 9.813258720268772e-05, "loss": 0.8406, "step": 147820 }, { "epoch": 2.595375621060763, "grad_norm": 0.05999489348770206, "learning_rate": 9.812144826340935e-05, "loss": 0.8419, "step": 147830 }, { "epoch": 2.5955511859407645, "grad_norm": 0.07512940446740234, "learning_rate": 9.811030936060276e-05, "loss": 0.8422, "step": 147840 }, { "epoch": 2.5957267508207655, "grad_norm": 0.051881934186704934, "learning_rate": 9.809917049440913e-05, "loss": 0.843, "step": 147850 }, { "epoch": 2.5959023157007675, "grad_norm": 0.05858031966936865, "learning_rate": 9.808803166496956e-05, "loss": 0.843, "step": 147860 }, { "epoch": 2.5960778805807685, "grad_norm": 0.06454072851231737, "learning_rate": 9.807689287242518e-05, "loss": 0.8481, "step": 147870 }, { "epoch": 2.59625344546077, "grad_norm": 0.05290184040292287, "learning_rate": 9.806575411691711e-05, "loss": 0.8339, "step": 147880 }, { "epoch": 2.5964290103407714, "grad_norm": 0.06573344261943087, "learning_rate": 9.805461539858652e-05, "loss": 0.838, "step": 147890 }, { "epoch": 2.596604575220773, "grad_norm": 0.05216866014009823, "learning_rate": 9.804347671757455e-05, "loss": 0.8402, "step": 147900 }, { "epoch": 2.5967801401007744, "grad_norm": 0.06078928904316723, "learning_rate": 9.803233807402221e-05, "loss": 0.8389, "step": 147910 }, { "epoch": 2.5969557049807754, "grad_norm": 0.05936017588007188, "learning_rate": 9.802119946807079e-05, "loss": 0.8422, "step": 147920 }, { "epoch": 2.597131269860777, "grad_norm": 0.06610648339368014, "learning_rate": 9.801006089986132e-05, "loss": 0.8435, "step": 147930 }, { "epoch": 2.5973068347407784, "grad_norm": 0.05516239202311649, "learning_rate": 9.799892236953496e-05, "loss": 0.8456, "step": 147940 }, { "epoch": 2.59748239962078, "grad_norm": 0.05530881850135712, "learning_rate": 9.798778387723281e-05, "loss": 0.8483, "step": 147950 }, { "epoch": 2.5976579645007813, "grad_norm": 0.07090014561902151, "learning_rate": 9.797664542309603e-05, "loss": 0.851, "step": 147960 }, { "epoch": 2.597833529380783, "grad_norm": 0.05371929783575964, "learning_rate": 9.796550700726575e-05, "loss": 0.8383, "step": 147970 }, { "epoch": 2.5980090942607843, "grad_norm": 0.06557406484698074, "learning_rate": 9.795436862988301e-05, "loss": 0.844, "step": 147980 }, { "epoch": 2.5981846591407853, "grad_norm": 0.04401152068852839, "learning_rate": 9.794323029108906e-05, "loss": 0.8409, "step": 147990 }, { "epoch": 2.598360224020787, "grad_norm": 0.04878131381248311, "learning_rate": 9.793209199102496e-05, "loss": 0.846, "step": 148000 }, { "epoch": 2.5985357889007883, "grad_norm": 0.051657241136799946, "learning_rate": 9.792095372983182e-05, "loss": 0.845, "step": 148010 }, { "epoch": 2.5987113537807898, "grad_norm": 0.07040866577279255, "learning_rate": 9.790981550765082e-05, "loss": 0.8388, "step": 148020 }, { "epoch": 2.5988869186607912, "grad_norm": 0.05485597771308122, "learning_rate": 9.789867732462303e-05, "loss": 0.8396, "step": 148030 }, { "epoch": 2.5990624835407923, "grad_norm": 0.05379423769304707, "learning_rate": 9.788753918088961e-05, "loss": 0.837, "step": 148040 }, { "epoch": 2.5992380484207938, "grad_norm": 0.07001354579574294, "learning_rate": 9.787640107659161e-05, "loss": 0.8328, "step": 148050 }, { "epoch": 2.5994136133007952, "grad_norm": 0.05009994280985529, "learning_rate": 9.786526301187028e-05, "loss": 0.8482, "step": 148060 }, { "epoch": 2.5995891781807967, "grad_norm": 0.07006126651661213, "learning_rate": 9.785412498686667e-05, "loss": 0.8429, "step": 148070 }, { "epoch": 2.599764743060798, "grad_norm": 0.04711642015508274, "learning_rate": 9.784298700172182e-05, "loss": 0.8449, "step": 148080 }, { "epoch": 2.5999403079407997, "grad_norm": 0.04468429688079152, "learning_rate": 9.783184905657699e-05, "loss": 0.8406, "step": 148090 }, { "epoch": 2.600115872820801, "grad_norm": 0.04941276705293672, "learning_rate": 9.782071115157324e-05, "loss": 0.8409, "step": 148100 }, { "epoch": 2.600291437700802, "grad_norm": 0.06808846627379002, "learning_rate": 9.780957328685169e-05, "loss": 0.8392, "step": 148110 }, { "epoch": 2.6004670025808037, "grad_norm": 0.06596324553933017, "learning_rate": 9.779843546255343e-05, "loss": 0.8493, "step": 148120 }, { "epoch": 2.600642567460805, "grad_norm": 0.05818817958491927, "learning_rate": 9.778729767881961e-05, "loss": 0.8337, "step": 148130 }, { "epoch": 2.6008181323408066, "grad_norm": 0.04987284966226184, "learning_rate": 9.777615993579137e-05, "loss": 0.8414, "step": 148140 }, { "epoch": 2.600993697220808, "grad_norm": 0.06169261766583526, "learning_rate": 9.776502223360977e-05, "loss": 0.8445, "step": 148150 }, { "epoch": 2.601169262100809, "grad_norm": 0.05056205358891741, "learning_rate": 9.775388457241596e-05, "loss": 0.8423, "step": 148160 }, { "epoch": 2.6013448269808106, "grad_norm": 0.054912617720560566, "learning_rate": 9.774274695235109e-05, "loss": 0.8386, "step": 148170 }, { "epoch": 2.601520391860812, "grad_norm": 0.05911986632084129, "learning_rate": 9.77316093735562e-05, "loss": 0.8458, "step": 148180 }, { "epoch": 2.6016959567408136, "grad_norm": 0.054224299966651957, "learning_rate": 9.772047183617248e-05, "loss": 0.8389, "step": 148190 }, { "epoch": 2.601871521620815, "grad_norm": 0.08677448777445855, "learning_rate": 9.7709334340341e-05, "loss": 0.841, "step": 148200 }, { "epoch": 2.6020470865008165, "grad_norm": 0.07499928057974105, "learning_rate": 9.769819688620287e-05, "loss": 0.8386, "step": 148210 }, { "epoch": 2.602222651380818, "grad_norm": 0.05566648680345528, "learning_rate": 9.76870594738992e-05, "loss": 0.8454, "step": 148220 }, { "epoch": 2.602398216260819, "grad_norm": 0.05643924788458931, "learning_rate": 9.767592210357115e-05, "loss": 0.8481, "step": 148230 }, { "epoch": 2.6025737811408205, "grad_norm": 0.05671698263214561, "learning_rate": 9.766478477535984e-05, "loss": 0.8484, "step": 148240 }, { "epoch": 2.602749346020822, "grad_norm": 0.04914611743769659, "learning_rate": 9.765364748940627e-05, "loss": 0.8473, "step": 148250 }, { "epoch": 2.6029249109008235, "grad_norm": 0.05484894128136698, "learning_rate": 9.764251024585169e-05, "loss": 0.8421, "step": 148260 }, { "epoch": 2.603100475780825, "grad_norm": 0.05236877720333318, "learning_rate": 9.763137304483713e-05, "loss": 0.846, "step": 148270 }, { "epoch": 2.603276040660826, "grad_norm": 0.06072420547956174, "learning_rate": 9.762023588650372e-05, "loss": 0.839, "step": 148280 }, { "epoch": 2.603451605540828, "grad_norm": 0.04361133423246296, "learning_rate": 9.760909877099254e-05, "loss": 0.8461, "step": 148290 }, { "epoch": 2.603627170420829, "grad_norm": 0.0445805762939854, "learning_rate": 9.759796169844478e-05, "loss": 0.8418, "step": 148300 }, { "epoch": 2.6038027353008304, "grad_norm": 0.04898657118516653, "learning_rate": 9.758682466900146e-05, "loss": 0.8436, "step": 148310 }, { "epoch": 2.603978300180832, "grad_norm": 0.0580903403959444, "learning_rate": 9.75756876828037e-05, "loss": 0.8378, "step": 148320 }, { "epoch": 2.6041538650608334, "grad_norm": 0.06518444350154497, "learning_rate": 9.756455073999268e-05, "loss": 0.8558, "step": 148330 }, { "epoch": 2.604329429940835, "grad_norm": 0.05110578587179342, "learning_rate": 9.755341384070946e-05, "loss": 0.8447, "step": 148340 }, { "epoch": 2.604504994820836, "grad_norm": 0.05095964823622443, "learning_rate": 9.754227698509512e-05, "loss": 0.8367, "step": 148350 }, { "epoch": 2.6046805597008373, "grad_norm": 0.0518030373832241, "learning_rate": 9.753114017329082e-05, "loss": 0.8398, "step": 148360 }, { "epoch": 2.604856124580839, "grad_norm": 0.06351048006430467, "learning_rate": 9.752000340543765e-05, "loss": 0.8465, "step": 148370 }, { "epoch": 2.6050316894608403, "grad_norm": 0.05398249659584223, "learning_rate": 9.750886668167668e-05, "loss": 0.8508, "step": 148380 }, { "epoch": 2.6052072543408418, "grad_norm": 0.06688135161491413, "learning_rate": 9.7497730002149e-05, "loss": 0.8431, "step": 148390 }, { "epoch": 2.605382819220843, "grad_norm": 0.045836521170116704, "learning_rate": 9.748659336699579e-05, "loss": 0.8393, "step": 148400 }, { "epoch": 2.6055583841008447, "grad_norm": 0.0668292468147587, "learning_rate": 9.747545677635815e-05, "loss": 0.8374, "step": 148410 }, { "epoch": 2.6057339489808458, "grad_norm": 0.06950155121043203, "learning_rate": 9.746432023037709e-05, "loss": 0.8474, "step": 148420 }, { "epoch": 2.6059095138608472, "grad_norm": 0.05780111044925125, "learning_rate": 9.74531837291938e-05, "loss": 0.8435, "step": 148430 }, { "epoch": 2.6060850787408487, "grad_norm": 0.05202676176255151, "learning_rate": 9.744204727294936e-05, "loss": 0.8451, "step": 148440 }, { "epoch": 2.60626064362085, "grad_norm": 0.05076461741091999, "learning_rate": 9.743091086178486e-05, "loss": 0.8432, "step": 148450 }, { "epoch": 2.6064362085008517, "grad_norm": 0.0856210307720876, "learning_rate": 9.741977449584137e-05, "loss": 0.8386, "step": 148460 }, { "epoch": 2.6066117733808527, "grad_norm": 0.09024446301937361, "learning_rate": 9.740863817526006e-05, "loss": 0.8451, "step": 148470 }, { "epoch": 2.606787338260854, "grad_norm": 0.06889585343774948, "learning_rate": 9.7397501900182e-05, "loss": 0.8444, "step": 148480 }, { "epoch": 2.6069629031408557, "grad_norm": 0.050642845654905635, "learning_rate": 9.738636567074822e-05, "loss": 0.8434, "step": 148490 }, { "epoch": 2.607138468020857, "grad_norm": 0.05199152864105722, "learning_rate": 9.737522948709993e-05, "loss": 0.8373, "step": 148500 }, { "epoch": 2.6073140329008586, "grad_norm": 0.05818131506435347, "learning_rate": 9.736409334937817e-05, "loss": 0.8449, "step": 148510 }, { "epoch": 2.6074895977808596, "grad_norm": 0.104043923644471, "learning_rate": 9.735295725772403e-05, "loss": 0.8446, "step": 148520 }, { "epoch": 2.6076651626608616, "grad_norm": 0.04754962112220924, "learning_rate": 9.734182121227865e-05, "loss": 0.8493, "step": 148530 }, { "epoch": 2.6078407275408626, "grad_norm": 0.07218904838363353, "learning_rate": 9.733068521318309e-05, "loss": 0.8439, "step": 148540 }, { "epoch": 2.608016292420864, "grad_norm": 0.05577658443820227, "learning_rate": 9.731954926057843e-05, "loss": 0.8444, "step": 148550 }, { "epoch": 2.6081918573008656, "grad_norm": 0.05330034014568891, "learning_rate": 9.730841335460576e-05, "loss": 0.846, "step": 148560 }, { "epoch": 2.608367422180867, "grad_norm": 0.0709464335160026, "learning_rate": 9.729727749540625e-05, "loss": 0.8414, "step": 148570 }, { "epoch": 2.6085429870608685, "grad_norm": 0.04605868413401664, "learning_rate": 9.728614168312097e-05, "loss": 0.8484, "step": 148580 }, { "epoch": 2.6087185519408695, "grad_norm": 0.07649715417571938, "learning_rate": 9.72750059178909e-05, "loss": 0.8498, "step": 148590 }, { "epoch": 2.608894116820871, "grad_norm": 0.05506703025058585, "learning_rate": 9.726387019985729e-05, "loss": 0.8565, "step": 148600 }, { "epoch": 2.6090696817008725, "grad_norm": 0.047689529500663096, "learning_rate": 9.725273452916112e-05, "loss": 0.8349, "step": 148610 }, { "epoch": 2.609245246580874, "grad_norm": 0.06589454659922182, "learning_rate": 9.724159890594356e-05, "loss": 0.8379, "step": 148620 }, { "epoch": 2.6094208114608755, "grad_norm": 0.04737852614201036, "learning_rate": 9.723046333034562e-05, "loss": 0.8453, "step": 148630 }, { "epoch": 2.609596376340877, "grad_norm": 0.06260414369602472, "learning_rate": 9.721932780250846e-05, "loss": 0.8416, "step": 148640 }, { "epoch": 2.6097719412208784, "grad_norm": 0.04487099986055861, "learning_rate": 9.720819232257315e-05, "loss": 0.8444, "step": 148650 }, { "epoch": 2.6099475061008794, "grad_norm": 0.06605675306595325, "learning_rate": 9.719705689068072e-05, "loss": 0.8488, "step": 148660 }, { "epoch": 2.610123070980881, "grad_norm": 0.052495614480569136, "learning_rate": 9.718592150697234e-05, "loss": 0.845, "step": 148670 }, { "epoch": 2.6102986358608824, "grad_norm": 0.04494260050418276, "learning_rate": 9.71747861715891e-05, "loss": 0.8403, "step": 148680 }, { "epoch": 2.610474200740884, "grad_norm": 0.05254935338264068, "learning_rate": 9.716365088467198e-05, "loss": 0.839, "step": 148690 }, { "epoch": 2.6106497656208854, "grad_norm": 0.08301725609118399, "learning_rate": 9.715251564636219e-05, "loss": 0.8437, "step": 148700 }, { "epoch": 2.6108253305008864, "grad_norm": 0.07827085311437729, "learning_rate": 9.714138045680077e-05, "loss": 0.8556, "step": 148710 }, { "epoch": 2.611000895380888, "grad_norm": 0.048498017735527664, "learning_rate": 9.713024531612879e-05, "loss": 0.8411, "step": 148720 }, { "epoch": 2.6111764602608893, "grad_norm": 0.05386358688663874, "learning_rate": 9.71191102244873e-05, "loss": 0.8513, "step": 148730 }, { "epoch": 2.611352025140891, "grad_norm": 0.0431439081703766, "learning_rate": 9.710797518201747e-05, "loss": 0.8487, "step": 148740 }, { "epoch": 2.6115275900208923, "grad_norm": 0.05590396130367564, "learning_rate": 9.709684018886036e-05, "loss": 0.8319, "step": 148750 }, { "epoch": 2.6117031549008938, "grad_norm": 0.06788566502568791, "learning_rate": 9.708570524515697e-05, "loss": 0.8407, "step": 148760 }, { "epoch": 2.6118787197808953, "grad_norm": 0.05651921240583665, "learning_rate": 9.70745703510485e-05, "loss": 0.8456, "step": 148770 }, { "epoch": 2.6120542846608963, "grad_norm": 0.04944660496525395, "learning_rate": 9.706343550667596e-05, "loss": 0.8487, "step": 148780 }, { "epoch": 2.6122298495408978, "grad_norm": 0.06052856477628428, "learning_rate": 9.705230071218045e-05, "loss": 0.8434, "step": 148790 }, { "epoch": 2.6124054144208992, "grad_norm": 0.04323274399365831, "learning_rate": 9.704116596770304e-05, "loss": 0.8408, "step": 148800 }, { "epoch": 2.6125809793009007, "grad_norm": 0.04717051804950489, "learning_rate": 9.70300312733848e-05, "loss": 0.8432, "step": 148810 }, { "epoch": 2.612756544180902, "grad_norm": 0.05747838920389176, "learning_rate": 9.701889662936685e-05, "loss": 0.8413, "step": 148820 }, { "epoch": 2.6129321090609032, "grad_norm": 0.05291222122754614, "learning_rate": 9.70077620357902e-05, "loss": 0.8394, "step": 148830 }, { "epoch": 2.6131076739409047, "grad_norm": 0.05333152286896178, "learning_rate": 9.699662749279601e-05, "loss": 0.8437, "step": 148840 }, { "epoch": 2.613283238820906, "grad_norm": 0.056449483987345055, "learning_rate": 9.698549300052531e-05, "loss": 0.84, "step": 148850 }, { "epoch": 2.6134588037009077, "grad_norm": 0.056425295251929124, "learning_rate": 9.697435855911917e-05, "loss": 0.8513, "step": 148860 }, { "epoch": 2.613634368580909, "grad_norm": 0.05992418870810401, "learning_rate": 9.696322416871872e-05, "loss": 0.8493, "step": 148870 }, { "epoch": 2.6138099334609106, "grad_norm": 0.05400688067172821, "learning_rate": 9.695208982946495e-05, "loss": 0.8359, "step": 148880 }, { "epoch": 2.613985498340912, "grad_norm": 0.04912363729480335, "learning_rate": 9.694095554149901e-05, "loss": 0.8433, "step": 148890 }, { "epoch": 2.614161063220913, "grad_norm": 0.05642845369998292, "learning_rate": 9.692982130496189e-05, "loss": 0.8375, "step": 148900 }, { "epoch": 2.6143366281009146, "grad_norm": 0.057219470030742424, "learning_rate": 9.691868711999476e-05, "loss": 0.8412, "step": 148910 }, { "epoch": 2.614512192980916, "grad_norm": 0.05936441481125335, "learning_rate": 9.690755298673866e-05, "loss": 0.841, "step": 148920 }, { "epoch": 2.6146877578609176, "grad_norm": 0.051039809484219024, "learning_rate": 9.689641890533459e-05, "loss": 0.8432, "step": 148930 }, { "epoch": 2.614863322740919, "grad_norm": 0.04557116494351116, "learning_rate": 9.688528487592372e-05, "loss": 0.8522, "step": 148940 }, { "epoch": 2.61503888762092, "grad_norm": 0.05211617273327291, "learning_rate": 9.687415089864707e-05, "loss": 0.8407, "step": 148950 }, { "epoch": 2.6152144525009215, "grad_norm": 0.05548170093340263, "learning_rate": 9.686301697364573e-05, "loss": 0.8427, "step": 148960 }, { "epoch": 2.615390017380923, "grad_norm": 0.06847118055815275, "learning_rate": 9.685188310106073e-05, "loss": 0.8413, "step": 148970 }, { "epoch": 2.6155655822609245, "grad_norm": 0.08072311061194068, "learning_rate": 9.684074928103319e-05, "loss": 0.836, "step": 148980 }, { "epoch": 2.615741147140926, "grad_norm": 0.0632325503496503, "learning_rate": 9.682961551370415e-05, "loss": 0.8418, "step": 148990 }, { "epoch": 2.6159167120209275, "grad_norm": 0.04135816947972696, "learning_rate": 9.681848179921464e-05, "loss": 0.8457, "step": 149000 }, { "epoch": 2.616092276900929, "grad_norm": 0.0532048371229542, "learning_rate": 9.680734813770582e-05, "loss": 0.8402, "step": 149010 }, { "epoch": 2.61626784178093, "grad_norm": 0.05285669322215486, "learning_rate": 9.67962145293187e-05, "loss": 0.836, "step": 149020 }, { "epoch": 2.6164434066609314, "grad_norm": 0.09505545557798047, "learning_rate": 9.678508097419433e-05, "loss": 0.8449, "step": 149030 }, { "epoch": 2.616618971540933, "grad_norm": 0.05381000512369598, "learning_rate": 9.677394747247379e-05, "loss": 0.8398, "step": 149040 }, { "epoch": 2.6167945364209344, "grad_norm": 0.04500362958014994, "learning_rate": 9.676281402429817e-05, "loss": 0.8482, "step": 149050 }, { "epoch": 2.616970101300936, "grad_norm": 0.08531100218193165, "learning_rate": 9.675168062980849e-05, "loss": 0.8441, "step": 149060 }, { "epoch": 2.617145666180937, "grad_norm": 0.04890135908927714, "learning_rate": 9.674054728914579e-05, "loss": 0.8511, "step": 149070 }, { "epoch": 2.617321231060939, "grad_norm": 0.054678410601242705, "learning_rate": 9.672941400245122e-05, "loss": 0.8357, "step": 149080 }, { "epoch": 2.61749679594094, "grad_norm": 0.04978985577691709, "learning_rate": 9.67182807698658e-05, "loss": 0.8408, "step": 149090 }, { "epoch": 2.6176723608209413, "grad_norm": 0.05378695151685628, "learning_rate": 9.670714759153056e-05, "loss": 0.8408, "step": 149100 }, { "epoch": 2.617847925700943, "grad_norm": 0.05405855527018492, "learning_rate": 9.669601446758659e-05, "loss": 0.84, "step": 149110 }, { "epoch": 2.6180234905809443, "grad_norm": 0.048510827612217054, "learning_rate": 9.668488139817496e-05, "loss": 0.8454, "step": 149120 }, { "epoch": 2.618199055460946, "grad_norm": 0.04675100446415186, "learning_rate": 9.667374838343668e-05, "loss": 0.8383, "step": 149130 }, { "epoch": 2.618374620340947, "grad_norm": 0.05401832624829268, "learning_rate": 9.666261542351285e-05, "loss": 0.8432, "step": 149140 }, { "epoch": 2.6185501852209483, "grad_norm": 0.042931441195958084, "learning_rate": 9.665148251854452e-05, "loss": 0.843, "step": 149150 }, { "epoch": 2.6187257501009498, "grad_norm": 0.046612459582340095, "learning_rate": 9.664034966867273e-05, "loss": 0.8429, "step": 149160 }, { "epoch": 2.6189013149809512, "grad_norm": 0.05229556015044999, "learning_rate": 9.662921687403853e-05, "loss": 0.8478, "step": 149170 }, { "epoch": 2.6190768798609527, "grad_norm": 0.038333504747604684, "learning_rate": 9.661808413478302e-05, "loss": 0.846, "step": 149180 }, { "epoch": 2.6192524447409538, "grad_norm": 0.041494572761358206, "learning_rate": 9.660695145104721e-05, "loss": 0.8372, "step": 149190 }, { "epoch": 2.6194280096209557, "grad_norm": 0.05760031269854195, "learning_rate": 9.659581882297216e-05, "loss": 0.8492, "step": 149200 }, { "epoch": 2.6196035745009567, "grad_norm": 0.06922416803801278, "learning_rate": 9.658468625069895e-05, "loss": 0.8495, "step": 149210 }, { "epoch": 2.619779139380958, "grad_norm": 0.05868728444926621, "learning_rate": 9.657355373436862e-05, "loss": 0.8407, "step": 149220 }, { "epoch": 2.6199547042609597, "grad_norm": 0.0528337708505043, "learning_rate": 9.656242127412218e-05, "loss": 0.8479, "step": 149230 }, { "epoch": 2.620130269140961, "grad_norm": 0.07178368285549026, "learning_rate": 9.65512888701007e-05, "loss": 0.8409, "step": 149240 }, { "epoch": 2.6203058340209626, "grad_norm": 0.05137993523324375, "learning_rate": 9.654015652244527e-05, "loss": 0.8397, "step": 149250 }, { "epoch": 2.6204813989009637, "grad_norm": 0.0564926233076178, "learning_rate": 9.652902423129694e-05, "loss": 0.8358, "step": 149260 }, { "epoch": 2.620656963780965, "grad_norm": 0.06161737915464768, "learning_rate": 9.651789199679667e-05, "loss": 0.848, "step": 149270 }, { "epoch": 2.6208325286609666, "grad_norm": 0.056535459949189425, "learning_rate": 9.650675981908561e-05, "loss": 0.8376, "step": 149280 }, { "epoch": 2.621008093540968, "grad_norm": 0.05489480689404442, "learning_rate": 9.649562769830475e-05, "loss": 0.8488, "step": 149290 }, { "epoch": 2.6211836584209696, "grad_norm": 0.05544587426793913, "learning_rate": 9.648449563459517e-05, "loss": 0.8436, "step": 149300 }, { "epoch": 2.6213592233009706, "grad_norm": 0.049120120833366705, "learning_rate": 9.647336362809787e-05, "loss": 0.8451, "step": 149310 }, { "epoch": 2.6215347881809725, "grad_norm": 0.054385955676251284, "learning_rate": 9.646223167895393e-05, "loss": 0.8473, "step": 149320 }, { "epoch": 2.6217103530609736, "grad_norm": 0.0437482921435556, "learning_rate": 9.645109978730438e-05, "loss": 0.8396, "step": 149330 }, { "epoch": 2.621885917940975, "grad_norm": 0.06544402391853625, "learning_rate": 9.643996795329025e-05, "loss": 0.8423, "step": 149340 }, { "epoch": 2.6220614828209765, "grad_norm": 0.07954463741373206, "learning_rate": 9.642883617705263e-05, "loss": 0.842, "step": 149350 }, { "epoch": 2.622237047700978, "grad_norm": 0.045045171361275324, "learning_rate": 9.641770445873251e-05, "loss": 0.8411, "step": 149360 }, { "epoch": 2.6224126125809795, "grad_norm": 0.06386724070672639, "learning_rate": 9.640657279847097e-05, "loss": 0.8503, "step": 149370 }, { "epoch": 2.6225881774609805, "grad_norm": 0.04117305832466428, "learning_rate": 9.639544119640902e-05, "loss": 0.8451, "step": 149380 }, { "epoch": 2.622763742340982, "grad_norm": 0.047621512098159416, "learning_rate": 9.638430965268773e-05, "loss": 0.8353, "step": 149390 }, { "epoch": 2.6229393072209835, "grad_norm": 0.05618669379975567, "learning_rate": 9.637317816744813e-05, "loss": 0.843, "step": 149400 }, { "epoch": 2.623114872100985, "grad_norm": 0.06571374184535919, "learning_rate": 9.636204674083119e-05, "loss": 0.8478, "step": 149410 }, { "epoch": 2.6232904369809864, "grad_norm": 0.06583888023747694, "learning_rate": 9.635091537297806e-05, "loss": 0.8505, "step": 149420 }, { "epoch": 2.623466001860988, "grad_norm": 0.05615538819905938, "learning_rate": 9.633978406402974e-05, "loss": 0.8426, "step": 149430 }, { "epoch": 2.6236415667409894, "grad_norm": 0.06280151972513157, "learning_rate": 9.632865281412717e-05, "loss": 0.8421, "step": 149440 }, { "epoch": 2.6238171316209904, "grad_norm": 0.058037472999262645, "learning_rate": 9.631752162341153e-05, "loss": 0.8408, "step": 149450 }, { "epoch": 2.623992696500992, "grad_norm": 0.06247469062795787, "learning_rate": 9.630639049202378e-05, "loss": 0.8459, "step": 149460 }, { "epoch": 2.6241682613809933, "grad_norm": 0.0530914612154189, "learning_rate": 9.629525942010497e-05, "loss": 0.8358, "step": 149470 }, { "epoch": 2.624343826260995, "grad_norm": 0.060116164971770164, "learning_rate": 9.628412840779608e-05, "loss": 0.8487, "step": 149480 }, { "epoch": 2.6245193911409963, "grad_norm": 0.0546875792433196, "learning_rate": 9.627299745523824e-05, "loss": 0.8454, "step": 149490 }, { "epoch": 2.6246949560209973, "grad_norm": 0.06231463811300292, "learning_rate": 9.62618665625724e-05, "loss": 0.8446, "step": 149500 }, { "epoch": 2.624870520900999, "grad_norm": 0.05270711930962299, "learning_rate": 9.625073572993959e-05, "loss": 0.8527, "step": 149510 }, { "epoch": 2.6250460857810003, "grad_norm": 0.06735497566652919, "learning_rate": 9.623960495748092e-05, "loss": 0.8404, "step": 149520 }, { "epoch": 2.6252216506610018, "grad_norm": 0.07296505654996063, "learning_rate": 9.622847424533735e-05, "loss": 0.8432, "step": 149530 }, { "epoch": 2.6253972155410032, "grad_norm": 0.04971465606045532, "learning_rate": 9.62173435936499e-05, "loss": 0.8511, "step": 149540 }, { "epoch": 2.6255727804210047, "grad_norm": 0.052105548510371705, "learning_rate": 9.620621300255966e-05, "loss": 0.8387, "step": 149550 }, { "epoch": 2.625748345301006, "grad_norm": 0.07696174329201198, "learning_rate": 9.619508247220762e-05, "loss": 0.8398, "step": 149560 }, { "epoch": 2.6259239101810072, "grad_norm": 0.061746581082822834, "learning_rate": 9.618395200273479e-05, "loss": 0.8479, "step": 149570 }, { "epoch": 2.6260994750610087, "grad_norm": 0.0599390640265801, "learning_rate": 9.617282159428219e-05, "loss": 0.8432, "step": 149580 }, { "epoch": 2.62627503994101, "grad_norm": 0.05538749639008634, "learning_rate": 9.61616912469909e-05, "loss": 0.8327, "step": 149590 }, { "epoch": 2.6264506048210117, "grad_norm": 0.05995095263015516, "learning_rate": 9.615056096100193e-05, "loss": 0.8426, "step": 149600 }, { "epoch": 2.626626169701013, "grad_norm": 0.0643223055449359, "learning_rate": 9.613943073645621e-05, "loss": 0.8444, "step": 149610 }, { "epoch": 2.626801734581014, "grad_norm": 0.046671137611403936, "learning_rate": 9.612830057349488e-05, "loss": 0.8457, "step": 149620 }, { "epoch": 2.6269772994610157, "grad_norm": 0.06342235335686192, "learning_rate": 9.611717047225892e-05, "loss": 0.8355, "step": 149630 }, { "epoch": 2.627152864341017, "grad_norm": 0.07119294691488533, "learning_rate": 9.610604043288934e-05, "loss": 0.849, "step": 149640 }, { "epoch": 2.6273284292210186, "grad_norm": 0.05549860431141439, "learning_rate": 9.609491045552715e-05, "loss": 0.8361, "step": 149650 }, { "epoch": 2.62750399410102, "grad_norm": 0.06273018250271999, "learning_rate": 9.608378054031342e-05, "loss": 0.8321, "step": 149660 }, { "epoch": 2.6276795589810216, "grad_norm": 0.05137761371207923, "learning_rate": 9.60726506873891e-05, "loss": 0.831, "step": 149670 }, { "epoch": 2.627855123861023, "grad_norm": 0.05174266619529791, "learning_rate": 9.606152089689521e-05, "loss": 0.8403, "step": 149680 }, { "epoch": 2.628030688741024, "grad_norm": 0.052993014047188834, "learning_rate": 9.605039116897285e-05, "loss": 0.8409, "step": 149690 }, { "epoch": 2.6282062536210256, "grad_norm": 0.07247590198152491, "learning_rate": 9.603926150376297e-05, "loss": 0.8474, "step": 149700 }, { "epoch": 2.628381818501027, "grad_norm": 0.06675279973902536, "learning_rate": 9.60281319014066e-05, "loss": 0.8458, "step": 149710 }, { "epoch": 2.6285573833810285, "grad_norm": 0.055002063965730055, "learning_rate": 9.601700236204475e-05, "loss": 0.8429, "step": 149720 }, { "epoch": 2.62873294826103, "grad_norm": 0.05933811094845235, "learning_rate": 9.600587288581844e-05, "loss": 0.8426, "step": 149730 }, { "epoch": 2.628908513141031, "grad_norm": 0.05216756203591894, "learning_rate": 9.599474347286868e-05, "loss": 0.8338, "step": 149740 }, { "epoch": 2.629084078021033, "grad_norm": 0.04579029953303335, "learning_rate": 9.598361412333645e-05, "loss": 0.8417, "step": 149750 }, { "epoch": 2.629259642901034, "grad_norm": 0.050382687607705944, "learning_rate": 9.59724848373628e-05, "loss": 0.8499, "step": 149760 }, { "epoch": 2.6294352077810355, "grad_norm": 0.06520660662691584, "learning_rate": 9.596135561508878e-05, "loss": 0.8454, "step": 149770 }, { "epoch": 2.629610772661037, "grad_norm": 0.052319997823392986, "learning_rate": 9.595022645665527e-05, "loss": 0.8372, "step": 149780 }, { "epoch": 2.6297863375410384, "grad_norm": 0.07419277194970694, "learning_rate": 9.593909736220341e-05, "loss": 0.8428, "step": 149790 }, { "epoch": 2.62996190242104, "grad_norm": 0.06114662177023278, "learning_rate": 9.592796833187415e-05, "loss": 0.845, "step": 149800 }, { "epoch": 2.630137467301041, "grad_norm": 0.05312003594657317, "learning_rate": 9.591683936580851e-05, "loss": 0.834, "step": 149810 }, { "epoch": 2.6303130321810424, "grad_norm": 0.06303346912270143, "learning_rate": 9.590571046414746e-05, "loss": 0.8414, "step": 149820 }, { "epoch": 2.630488597061044, "grad_norm": 0.06963264108001856, "learning_rate": 9.589458162703206e-05, "loss": 0.8418, "step": 149830 }, { "epoch": 2.6306641619410454, "grad_norm": 0.0588116277803903, "learning_rate": 9.58834528546033e-05, "loss": 0.843, "step": 149840 }, { "epoch": 2.630839726821047, "grad_norm": 0.0496700951633619, "learning_rate": 9.587232414700212e-05, "loss": 0.8399, "step": 149850 }, { "epoch": 2.631015291701048, "grad_norm": 0.05341327007339182, "learning_rate": 9.586119550436962e-05, "loss": 0.8452, "step": 149860 }, { "epoch": 2.63119085658105, "grad_norm": 0.052570623509831355, "learning_rate": 9.585006692684675e-05, "loss": 0.8419, "step": 149870 }, { "epoch": 2.631366421461051, "grad_norm": 0.05201961260347382, "learning_rate": 9.583893841457452e-05, "loss": 0.835, "step": 149880 }, { "epoch": 2.6315419863410523, "grad_norm": 0.05518315917118054, "learning_rate": 9.582780996769392e-05, "loss": 0.8471, "step": 149890 }, { "epoch": 2.6317175512210538, "grad_norm": 0.04397449845489029, "learning_rate": 9.581668158634598e-05, "loss": 0.8444, "step": 149900 }, { "epoch": 2.6318931161010553, "grad_norm": 0.05877241156014018, "learning_rate": 9.580555327067168e-05, "loss": 0.8476, "step": 149910 }, { "epoch": 2.6320686809810567, "grad_norm": 0.08117107516217152, "learning_rate": 9.579442502081195e-05, "loss": 0.8437, "step": 149920 }, { "epoch": 2.6322442458610578, "grad_norm": 0.03978691457218288, "learning_rate": 9.578329683690792e-05, "loss": 0.8484, "step": 149930 }, { "epoch": 2.6324198107410592, "grad_norm": 0.055799186216497425, "learning_rate": 9.577216871910053e-05, "loss": 0.8409, "step": 149940 }, { "epoch": 2.6325953756210607, "grad_norm": 0.04217624050796169, "learning_rate": 9.57610406675307e-05, "loss": 0.8425, "step": 149950 }, { "epoch": 2.632770940501062, "grad_norm": 0.08480092748997428, "learning_rate": 9.574991268233954e-05, "loss": 0.8484, "step": 149960 }, { "epoch": 2.6329465053810637, "grad_norm": 0.07004760676745236, "learning_rate": 9.573878476366797e-05, "loss": 0.8454, "step": 149970 }, { "epoch": 2.6331220702610647, "grad_norm": 0.05245658945745399, "learning_rate": 9.572765691165703e-05, "loss": 0.8483, "step": 149980 }, { "epoch": 2.6332976351410666, "grad_norm": 0.05734165641079432, "learning_rate": 9.571652912644766e-05, "loss": 0.8464, "step": 149990 }, { "epoch": 2.6334732000210677, "grad_norm": 0.08623257968528705, "learning_rate": 9.57054014081809e-05, "loss": 0.8393, "step": 150000 }, { "epoch": 2.633648764901069, "grad_norm": 0.05172833227882661, "learning_rate": 9.56942737569977e-05, "loss": 0.8419, "step": 150010 }, { "epoch": 2.6338243297810706, "grad_norm": 0.059653317620287326, "learning_rate": 9.568314617303904e-05, "loss": 0.8407, "step": 150020 }, { "epoch": 2.633999894661072, "grad_norm": 0.04823312227221882, "learning_rate": 9.567201865644598e-05, "loss": 0.8485, "step": 150030 }, { "epoch": 2.6341754595410736, "grad_norm": 0.05560223177884161, "learning_rate": 9.566089120735947e-05, "loss": 0.8412, "step": 150040 }, { "epoch": 2.6343510244210746, "grad_norm": 0.04882396167813291, "learning_rate": 9.564976382592046e-05, "loss": 0.8418, "step": 150050 }, { "epoch": 2.634526589301076, "grad_norm": 0.05273956610736683, "learning_rate": 9.563863651227e-05, "loss": 0.8475, "step": 150060 }, { "epoch": 2.6347021541810776, "grad_norm": 0.06200737151092648, "learning_rate": 9.562750926654903e-05, "loss": 0.8451, "step": 150070 }, { "epoch": 2.634877719061079, "grad_norm": 0.04604065798280388, "learning_rate": 9.561638208889855e-05, "loss": 0.8396, "step": 150080 }, { "epoch": 2.6350532839410805, "grad_norm": 0.04840571911142227, "learning_rate": 9.56052549794595e-05, "loss": 0.8475, "step": 150090 }, { "epoch": 2.635228848821082, "grad_norm": 0.058463661838908755, "learning_rate": 9.559412793837296e-05, "loss": 0.8422, "step": 150100 }, { "epoch": 2.6354044137010835, "grad_norm": 0.04698610385035131, "learning_rate": 9.558300096577986e-05, "loss": 0.8426, "step": 150110 }, { "epoch": 2.6355799785810845, "grad_norm": 0.05143380089026764, "learning_rate": 9.557187406182111e-05, "loss": 0.8491, "step": 150120 }, { "epoch": 2.635755543461086, "grad_norm": 0.044676957125037234, "learning_rate": 9.556074722663781e-05, "loss": 0.8403, "step": 150130 }, { "epoch": 2.6359311083410875, "grad_norm": 0.04627369274367202, "learning_rate": 9.55496204603709e-05, "loss": 0.8456, "step": 150140 }, { "epoch": 2.636106673221089, "grad_norm": 0.07031860346117061, "learning_rate": 9.55384937631613e-05, "loss": 0.8477, "step": 150150 }, { "epoch": 2.6362822381010904, "grad_norm": 0.0754247601965701, "learning_rate": 9.552736713515005e-05, "loss": 0.8328, "step": 150160 }, { "epoch": 2.6364578029810914, "grad_norm": 0.0572978431648883, "learning_rate": 9.551624057647812e-05, "loss": 0.8443, "step": 150170 }, { "epoch": 2.636633367861093, "grad_norm": 0.05646921307797406, "learning_rate": 9.550511408728645e-05, "loss": 0.8376, "step": 150180 }, { "epoch": 2.6368089327410944, "grad_norm": 0.058780056165529324, "learning_rate": 9.5493987667716e-05, "loss": 0.8456, "step": 150190 }, { "epoch": 2.636984497621096, "grad_norm": 0.04702439103919158, "learning_rate": 9.548286131790784e-05, "loss": 0.8413, "step": 150200 }, { "epoch": 2.6371600625010974, "grad_norm": 0.05230596264548972, "learning_rate": 9.547173503800288e-05, "loss": 0.8401, "step": 150210 }, { "epoch": 2.637335627381099, "grad_norm": 0.05964142926476374, "learning_rate": 9.546060882814207e-05, "loss": 0.8423, "step": 150220 }, { "epoch": 2.6375111922611003, "grad_norm": 0.05303576302906718, "learning_rate": 9.544948268846643e-05, "loss": 0.8453, "step": 150230 }, { "epoch": 2.6376867571411013, "grad_norm": 0.05713849963915812, "learning_rate": 9.543835661911692e-05, "loss": 0.8414, "step": 150240 }, { "epoch": 2.637862322021103, "grad_norm": 0.0508317730430756, "learning_rate": 9.542723062023449e-05, "loss": 0.8462, "step": 150250 }, { "epoch": 2.6380378869011043, "grad_norm": 0.06132367381294787, "learning_rate": 9.541610469196007e-05, "loss": 0.8437, "step": 150260 }, { "epoch": 2.638213451781106, "grad_norm": 0.0572479006303145, "learning_rate": 9.540497883443471e-05, "loss": 0.8458, "step": 150270 }, { "epoch": 2.6383890166611073, "grad_norm": 0.04298453043445209, "learning_rate": 9.539385304779938e-05, "loss": 0.8406, "step": 150280 }, { "epoch": 2.6385645815411083, "grad_norm": 0.05035699886809428, "learning_rate": 9.538272733219492e-05, "loss": 0.853, "step": 150290 }, { "epoch": 2.6387401464211098, "grad_norm": 0.05894702343350641, "learning_rate": 9.537160168776243e-05, "loss": 0.8453, "step": 150300 }, { "epoch": 2.6389157113011112, "grad_norm": 0.05512447883790953, "learning_rate": 9.536047611464284e-05, "loss": 0.8447, "step": 150310 }, { "epoch": 2.6390912761811127, "grad_norm": 0.04538804017774835, "learning_rate": 9.53493506129771e-05, "loss": 0.8386, "step": 150320 }, { "epoch": 2.639266841061114, "grad_norm": 0.04689891449628058, "learning_rate": 9.533822518290612e-05, "loss": 0.8432, "step": 150330 }, { "epoch": 2.6394424059411157, "grad_norm": 0.038317179834365604, "learning_rate": 9.532709982457095e-05, "loss": 0.8478, "step": 150340 }, { "epoch": 2.639617970821117, "grad_norm": 0.050098136592777696, "learning_rate": 9.531597453811253e-05, "loss": 0.8519, "step": 150350 }, { "epoch": 2.639793535701118, "grad_norm": 0.04697156669859706, "learning_rate": 9.530484932367174e-05, "loss": 0.8431, "step": 150360 }, { "epoch": 2.6399691005811197, "grad_norm": 0.05872813814427585, "learning_rate": 9.529372418138964e-05, "loss": 0.8446, "step": 150370 }, { "epoch": 2.640144665461121, "grad_norm": 0.054238965058305916, "learning_rate": 9.528259911140717e-05, "loss": 0.845, "step": 150380 }, { "epoch": 2.6403202303411226, "grad_norm": 0.050426372711101536, "learning_rate": 9.527147411386522e-05, "loss": 0.8472, "step": 150390 }, { "epoch": 2.640495795221124, "grad_norm": 0.06484215891855358, "learning_rate": 9.526034918890482e-05, "loss": 0.8316, "step": 150400 }, { "epoch": 2.640671360101125, "grad_norm": 0.051327459712313984, "learning_rate": 9.52492243366669e-05, "loss": 0.8439, "step": 150410 }, { "epoch": 2.640846924981127, "grad_norm": 0.061182362156691684, "learning_rate": 9.523809955729241e-05, "loss": 0.8427, "step": 150420 }, { "epoch": 2.641022489861128, "grad_norm": 0.05237457871344223, "learning_rate": 9.522697485092226e-05, "loss": 0.8454, "step": 150430 }, { "epoch": 2.6411980547411296, "grad_norm": 0.07222364206760126, "learning_rate": 9.521585021769748e-05, "loss": 0.8387, "step": 150440 }, { "epoch": 2.641373619621131, "grad_norm": 0.07468505434274399, "learning_rate": 9.520472565775903e-05, "loss": 0.8441, "step": 150450 }, { "epoch": 2.6415491845011325, "grad_norm": 0.05546983227222983, "learning_rate": 9.519360117124772e-05, "loss": 0.8435, "step": 150460 }, { "epoch": 2.641724749381134, "grad_norm": 0.06564430056741517, "learning_rate": 9.518247675830465e-05, "loss": 0.8356, "step": 150470 }, { "epoch": 2.641900314261135, "grad_norm": 0.06153798422847794, "learning_rate": 9.517135241907072e-05, "loss": 0.8334, "step": 150480 }, { "epoch": 2.6420758791411365, "grad_norm": 0.05261457958447237, "learning_rate": 9.516022815368686e-05, "loss": 0.8518, "step": 150490 }, { "epoch": 2.642251444021138, "grad_norm": 0.040705986598762164, "learning_rate": 9.514910396229402e-05, "loss": 0.843, "step": 150500 }, { "epoch": 2.6424270089011395, "grad_norm": 0.051413937153309, "learning_rate": 9.513797984503318e-05, "loss": 0.8474, "step": 150510 }, { "epoch": 2.642602573781141, "grad_norm": 0.04755823817389561, "learning_rate": 9.512685580204525e-05, "loss": 0.8351, "step": 150520 }, { "epoch": 2.642778138661142, "grad_norm": 0.05195692816767245, "learning_rate": 9.511573183347113e-05, "loss": 0.8458, "step": 150530 }, { "epoch": 2.642953703541144, "grad_norm": 0.048141615807225355, "learning_rate": 9.510460793945188e-05, "loss": 0.8522, "step": 150540 }, { "epoch": 2.643129268421145, "grad_norm": 0.04870337479498233, "learning_rate": 9.509348412012836e-05, "loss": 0.8388, "step": 150550 }, { "epoch": 2.6433048333011464, "grad_norm": 0.0465642536417802, "learning_rate": 9.508236037564151e-05, "loss": 0.8422, "step": 150560 }, { "epoch": 2.643480398181148, "grad_norm": 0.049972029226784564, "learning_rate": 9.507123670613231e-05, "loss": 0.8477, "step": 150570 }, { "epoch": 2.6436559630611494, "grad_norm": 0.06405183027133467, "learning_rate": 9.50601131117417e-05, "loss": 0.8443, "step": 150580 }, { "epoch": 2.643831527941151, "grad_norm": 0.05369390083517443, "learning_rate": 9.504898959261056e-05, "loss": 0.852, "step": 150590 }, { "epoch": 2.644007092821152, "grad_norm": 0.06278608932150226, "learning_rate": 9.503786614887982e-05, "loss": 0.8311, "step": 150600 }, { "epoch": 2.6441826577011533, "grad_norm": 0.06342700372586185, "learning_rate": 9.502674278069053e-05, "loss": 0.8492, "step": 150610 }, { "epoch": 2.644358222581155, "grad_norm": 0.054480674683152396, "learning_rate": 9.501561948818357e-05, "loss": 0.8395, "step": 150620 }, { "epoch": 2.6445337874611563, "grad_norm": 0.05606492596299416, "learning_rate": 9.500449627149979e-05, "loss": 0.84, "step": 150630 }, { "epoch": 2.644709352341158, "grad_norm": 0.05627660239459318, "learning_rate": 9.499337313078021e-05, "loss": 0.8542, "step": 150640 }, { "epoch": 2.644884917221159, "grad_norm": 0.04950510795614886, "learning_rate": 9.498225006616577e-05, "loss": 0.8424, "step": 150650 }, { "epoch": 2.6450604821011607, "grad_norm": 0.044928179431561376, "learning_rate": 9.497112707779738e-05, "loss": 0.8485, "step": 150660 }, { "epoch": 2.6452360469811618, "grad_norm": 0.05786539789038271, "learning_rate": 9.496000416581593e-05, "loss": 0.8405, "step": 150670 }, { "epoch": 2.6454116118611632, "grad_norm": 0.04054334536572059, "learning_rate": 9.49488813303624e-05, "loss": 0.8382, "step": 150680 }, { "epoch": 2.6455871767411647, "grad_norm": 0.06368528938182326, "learning_rate": 9.493775857157771e-05, "loss": 0.8465, "step": 150690 }, { "epoch": 2.645762741621166, "grad_norm": 0.0678491071923747, "learning_rate": 9.492663588960274e-05, "loss": 0.8447, "step": 150700 }, { "epoch": 2.6459383065011677, "grad_norm": 0.05476480735595069, "learning_rate": 9.49155132845785e-05, "loss": 0.8401, "step": 150710 }, { "epoch": 2.6461138713811687, "grad_norm": 0.05206330859401549, "learning_rate": 9.490439075664587e-05, "loss": 0.8424, "step": 150720 }, { "epoch": 2.64628943626117, "grad_norm": 0.05355659043853604, "learning_rate": 9.489326830594575e-05, "loss": 0.8495, "step": 150730 }, { "epoch": 2.6464650011411717, "grad_norm": 0.06367552687603881, "learning_rate": 9.488214593261912e-05, "loss": 0.8473, "step": 150740 }, { "epoch": 2.646640566021173, "grad_norm": 0.0554972288038264, "learning_rate": 9.487102363680687e-05, "loss": 0.8495, "step": 150750 }, { "epoch": 2.6468161309011746, "grad_norm": 0.05469454165470387, "learning_rate": 9.485990141864995e-05, "loss": 0.8458, "step": 150760 }, { "epoch": 2.6469916957811757, "grad_norm": 0.053677430698421714, "learning_rate": 9.484877927828917e-05, "loss": 0.8488, "step": 150770 }, { "epoch": 2.6471672606611776, "grad_norm": 0.04847487908525251, "learning_rate": 9.483765721586563e-05, "loss": 0.8479, "step": 150780 }, { "epoch": 2.6473428255411786, "grad_norm": 0.05069205204478219, "learning_rate": 9.482653523152013e-05, "loss": 0.8421, "step": 150790 }, { "epoch": 2.64751839042118, "grad_norm": 0.05410371853226987, "learning_rate": 9.481541332539356e-05, "loss": 0.8475, "step": 150800 }, { "epoch": 2.6476939553011816, "grad_norm": 0.049412515720492545, "learning_rate": 9.480429149762694e-05, "loss": 0.8496, "step": 150810 }, { "epoch": 2.647869520181183, "grad_norm": 0.04387684302463949, "learning_rate": 9.47931697483611e-05, "loss": 0.8373, "step": 150820 }, { "epoch": 2.6480450850611845, "grad_norm": 0.0707093570397422, "learning_rate": 9.478204807773704e-05, "loss": 0.8415, "step": 150830 }, { "epoch": 2.6482206499411856, "grad_norm": 0.06002975443612054, "learning_rate": 9.477092648589557e-05, "loss": 0.851, "step": 150840 }, { "epoch": 2.648396214821187, "grad_norm": 0.07288038346538545, "learning_rate": 9.47598049729777e-05, "loss": 0.8427, "step": 150850 }, { "epoch": 2.6485717797011885, "grad_norm": 0.057554526768286506, "learning_rate": 9.474868353912426e-05, "loss": 0.8444, "step": 150860 }, { "epoch": 2.64874734458119, "grad_norm": 0.05276667004389696, "learning_rate": 9.473756218447617e-05, "loss": 0.8484, "step": 150870 }, { "epoch": 2.6489229094611915, "grad_norm": 0.04314839841287447, "learning_rate": 9.472644090917443e-05, "loss": 0.8434, "step": 150880 }, { "epoch": 2.649098474341193, "grad_norm": 0.04618779202788762, "learning_rate": 9.471531971335988e-05, "loss": 0.8441, "step": 150890 }, { "epoch": 2.6492740392211944, "grad_norm": 0.061340445781124284, "learning_rate": 9.47041985971734e-05, "loss": 0.8456, "step": 150900 }, { "epoch": 2.6494496041011955, "grad_norm": 0.05526981902758779, "learning_rate": 9.469307756075595e-05, "loss": 0.8432, "step": 150910 }, { "epoch": 2.649625168981197, "grad_norm": 0.07571979196422328, "learning_rate": 9.468195660424842e-05, "loss": 0.8508, "step": 150920 }, { "epoch": 2.6498007338611984, "grad_norm": 0.07213875787355617, "learning_rate": 9.467083572779171e-05, "loss": 0.8461, "step": 150930 }, { "epoch": 2.6499762987412, "grad_norm": 0.07596694955474238, "learning_rate": 9.465971493152669e-05, "loss": 0.8462, "step": 150940 }, { "epoch": 2.6501518636212014, "grad_norm": 0.08149923190433367, "learning_rate": 9.464859421559437e-05, "loss": 0.843, "step": 150950 }, { "epoch": 2.6503274285012024, "grad_norm": 0.0769680022576516, "learning_rate": 9.463747358013554e-05, "loss": 0.8442, "step": 150960 }, { "epoch": 2.650502993381204, "grad_norm": 0.06621862852788316, "learning_rate": 9.462635302529109e-05, "loss": 0.8421, "step": 150970 }, { "epoch": 2.6506785582612054, "grad_norm": 0.05845632533427299, "learning_rate": 9.461523255120201e-05, "loss": 0.8451, "step": 150980 }, { "epoch": 2.650854123141207, "grad_norm": 0.05820433824000859, "learning_rate": 9.460411215800919e-05, "loss": 0.8422, "step": 150990 }, { "epoch": 2.6510296880212083, "grad_norm": 0.04097402836856973, "learning_rate": 9.459299184585347e-05, "loss": 0.8417, "step": 151000 }, { "epoch": 2.65120525290121, "grad_norm": 0.04742844584043897, "learning_rate": 9.458187161487576e-05, "loss": 0.8425, "step": 151010 }, { "epoch": 2.6513808177812113, "grad_norm": 0.08824869893495693, "learning_rate": 9.457075146521697e-05, "loss": 0.8382, "step": 151020 }, { "epoch": 2.6515563826612123, "grad_norm": 0.05574136162049148, "learning_rate": 9.4559631397018e-05, "loss": 0.8423, "step": 151030 }, { "epoch": 2.6517319475412138, "grad_norm": 0.0946649835908488, "learning_rate": 9.45485114104197e-05, "loss": 0.8409, "step": 151040 }, { "epoch": 2.6519075124212153, "grad_norm": 0.0554722735733332, "learning_rate": 9.453739150556303e-05, "loss": 0.8423, "step": 151050 }, { "epoch": 2.6520830773012167, "grad_norm": 0.06274020004140252, "learning_rate": 9.452627168258886e-05, "loss": 0.8491, "step": 151060 }, { "epoch": 2.652258642181218, "grad_norm": 0.05094417001924733, "learning_rate": 9.451515194163802e-05, "loss": 0.8405, "step": 151070 }, { "epoch": 2.6524342070612192, "grad_norm": 0.057749377664201865, "learning_rate": 9.45040322828515e-05, "loss": 0.8451, "step": 151080 }, { "epoch": 2.6526097719412207, "grad_norm": 0.05148488744647607, "learning_rate": 9.449291270637012e-05, "loss": 0.8418, "step": 151090 }, { "epoch": 2.652785336821222, "grad_norm": 0.08033013339787454, "learning_rate": 9.448179321233478e-05, "loss": 0.8359, "step": 151100 }, { "epoch": 2.6529609017012237, "grad_norm": 0.05269473334408509, "learning_rate": 9.447067380088634e-05, "loss": 0.8486, "step": 151110 }, { "epoch": 2.653136466581225, "grad_norm": 0.05246529695522727, "learning_rate": 9.445955447216577e-05, "loss": 0.8405, "step": 151120 }, { "epoch": 2.6533120314612266, "grad_norm": 0.057093226020914406, "learning_rate": 9.444843522631388e-05, "loss": 0.8479, "step": 151130 }, { "epoch": 2.653487596341228, "grad_norm": 0.04953418434989364, "learning_rate": 9.443731606347153e-05, "loss": 0.8431, "step": 151140 }, { "epoch": 2.653663161221229, "grad_norm": 0.04191530354351525, "learning_rate": 9.442619698377966e-05, "loss": 0.8412, "step": 151150 }, { "epoch": 2.6538387261012306, "grad_norm": 0.06184632968300676, "learning_rate": 9.441507798737918e-05, "loss": 0.8428, "step": 151160 }, { "epoch": 2.654014290981232, "grad_norm": 0.04110614335997871, "learning_rate": 9.44039590744109e-05, "loss": 0.8452, "step": 151170 }, { "epoch": 2.6541898558612336, "grad_norm": 0.041830347520999976, "learning_rate": 9.43928402450157e-05, "loss": 0.8474, "step": 151180 }, { "epoch": 2.654365420741235, "grad_norm": 0.05155225076603792, "learning_rate": 9.43817214993345e-05, "loss": 0.8537, "step": 151190 }, { "epoch": 2.654540985621236, "grad_norm": 0.061040070144417424, "learning_rate": 9.437060283750816e-05, "loss": 0.8406, "step": 151200 }, { "epoch": 2.654716550501238, "grad_norm": 0.057920294088537674, "learning_rate": 9.43594842596775e-05, "loss": 0.8499, "step": 151210 }, { "epoch": 2.654892115381239, "grad_norm": 0.0531599741658295, "learning_rate": 9.43483657659835e-05, "loss": 0.8456, "step": 151220 }, { "epoch": 2.6550676802612405, "grad_norm": 0.058628426846472556, "learning_rate": 9.433724735656698e-05, "loss": 0.8469, "step": 151230 }, { "epoch": 2.655243245141242, "grad_norm": 0.057773137961478004, "learning_rate": 9.43261290315688e-05, "loss": 0.8545, "step": 151240 }, { "epoch": 2.6554188100212435, "grad_norm": 0.05383486865127076, "learning_rate": 9.431501079112985e-05, "loss": 0.8429, "step": 151250 }, { "epoch": 2.655594374901245, "grad_norm": 0.07155345968666717, "learning_rate": 9.430389263539101e-05, "loss": 0.8471, "step": 151260 }, { "epoch": 2.655769939781246, "grad_norm": 0.06401494276052641, "learning_rate": 9.429277456449312e-05, "loss": 0.847, "step": 151270 }, { "epoch": 2.6559455046612475, "grad_norm": 0.06836103238995506, "learning_rate": 9.428165657857703e-05, "loss": 0.8379, "step": 151280 }, { "epoch": 2.656121069541249, "grad_norm": 0.050722043682443686, "learning_rate": 9.42705386777837e-05, "loss": 0.8414, "step": 151290 }, { "epoch": 2.6562966344212504, "grad_norm": 0.046982396014335154, "learning_rate": 9.425942086225391e-05, "loss": 0.8469, "step": 151300 }, { "epoch": 2.656472199301252, "grad_norm": 0.06211801086673411, "learning_rate": 9.42483031321285e-05, "loss": 0.8432, "step": 151310 }, { "epoch": 2.656647764181253, "grad_norm": 0.043922380497226625, "learning_rate": 9.423718548754844e-05, "loss": 0.851, "step": 151320 }, { "epoch": 2.656823329061255, "grad_norm": 0.0651496574472913, "learning_rate": 9.422606792865454e-05, "loss": 0.853, "step": 151330 }, { "epoch": 2.656998893941256, "grad_norm": 0.048014259969741284, "learning_rate": 9.421495045558764e-05, "loss": 0.847, "step": 151340 }, { "epoch": 2.6571744588212574, "grad_norm": 0.048519125269346604, "learning_rate": 9.42038330684886e-05, "loss": 0.8406, "step": 151350 }, { "epoch": 2.657350023701259, "grad_norm": 0.06595264218138389, "learning_rate": 9.419271576749833e-05, "loss": 0.8469, "step": 151360 }, { "epoch": 2.6575255885812603, "grad_norm": 0.0662592467128317, "learning_rate": 9.418159855275765e-05, "loss": 0.8396, "step": 151370 }, { "epoch": 2.657701153461262, "grad_norm": 0.05470432833496833, "learning_rate": 9.417048142440738e-05, "loss": 0.8431, "step": 151380 }, { "epoch": 2.657876718341263, "grad_norm": 0.07262935633043825, "learning_rate": 9.415936438258845e-05, "loss": 0.8432, "step": 151390 }, { "epoch": 2.6580522832212643, "grad_norm": 0.043635201799264986, "learning_rate": 9.41482474274417e-05, "loss": 0.8497, "step": 151400 }, { "epoch": 2.6582278481012658, "grad_norm": 0.06110140088084929, "learning_rate": 9.413713055910794e-05, "loss": 0.8323, "step": 151410 }, { "epoch": 2.6584034129812673, "grad_norm": 0.04326757093664401, "learning_rate": 9.412601377772808e-05, "loss": 0.8419, "step": 151420 }, { "epoch": 2.6585789778612687, "grad_norm": 0.060329512671560934, "learning_rate": 9.411489708344294e-05, "loss": 0.841, "step": 151430 }, { "epoch": 2.6587545427412698, "grad_norm": 0.07697380559197292, "learning_rate": 9.410378047639337e-05, "loss": 0.8399, "step": 151440 }, { "epoch": 2.6589301076212717, "grad_norm": 0.04562904078584801, "learning_rate": 9.409266395672019e-05, "loss": 0.8437, "step": 151450 }, { "epoch": 2.6591056725012727, "grad_norm": 0.059182396275992324, "learning_rate": 9.408154752456431e-05, "loss": 0.8433, "step": 151460 }, { "epoch": 2.659281237381274, "grad_norm": 0.06586524458301399, "learning_rate": 9.407043118006657e-05, "loss": 0.8468, "step": 151470 }, { "epoch": 2.6594568022612757, "grad_norm": 0.05802966359183291, "learning_rate": 9.405931492336774e-05, "loss": 0.8421, "step": 151480 }, { "epoch": 2.659632367141277, "grad_norm": 0.053052986961727375, "learning_rate": 9.404819875460876e-05, "loss": 0.8426, "step": 151490 }, { "epoch": 2.6598079320212786, "grad_norm": 0.0674717105196779, "learning_rate": 9.403708267393043e-05, "loss": 0.8467, "step": 151500 }, { "epoch": 2.6599834969012797, "grad_norm": 0.06253556499112238, "learning_rate": 9.402596668147359e-05, "loss": 0.8413, "step": 151510 }, { "epoch": 2.660159061781281, "grad_norm": 0.06045174406338209, "learning_rate": 9.401485077737909e-05, "loss": 0.8448, "step": 151520 }, { "epoch": 2.6603346266612826, "grad_norm": 0.07009732419609538, "learning_rate": 9.400373496178779e-05, "loss": 0.8437, "step": 151530 }, { "epoch": 2.660510191541284, "grad_norm": 0.06112488252201541, "learning_rate": 9.399261923484048e-05, "loss": 0.8439, "step": 151540 }, { "epoch": 2.6606857564212856, "grad_norm": 0.053243130790166356, "learning_rate": 9.398150359667801e-05, "loss": 0.8492, "step": 151550 }, { "epoch": 2.660861321301287, "grad_norm": 0.06897967455432359, "learning_rate": 9.397038804744126e-05, "loss": 0.8436, "step": 151560 }, { "epoch": 2.6610368861812885, "grad_norm": 0.07174911565265259, "learning_rate": 9.395927258727105e-05, "loss": 0.8442, "step": 151570 }, { "epoch": 2.6612124510612896, "grad_norm": 0.058736910478789525, "learning_rate": 9.394815721630818e-05, "loss": 0.8359, "step": 151580 }, { "epoch": 2.661388015941291, "grad_norm": 0.05316290620424622, "learning_rate": 9.393704193469354e-05, "loss": 0.8525, "step": 151590 }, { "epoch": 2.6615635808212925, "grad_norm": 0.04695926389173282, "learning_rate": 9.392592674256793e-05, "loss": 0.8462, "step": 151600 }, { "epoch": 2.661739145701294, "grad_norm": 0.04239226957676117, "learning_rate": 9.391481164007218e-05, "loss": 0.8372, "step": 151610 }, { "epoch": 2.6619147105812955, "grad_norm": 0.08845960197999383, "learning_rate": 9.390369662734709e-05, "loss": 0.843, "step": 151620 }, { "epoch": 2.6620902754612965, "grad_norm": 0.05008142472934135, "learning_rate": 9.389258170453357e-05, "loss": 0.8434, "step": 151630 }, { "epoch": 2.662265840341298, "grad_norm": 0.0546049519020161, "learning_rate": 9.38814668717724e-05, "loss": 0.8461, "step": 151640 }, { "epoch": 2.6624414052212995, "grad_norm": 0.06679228146694062, "learning_rate": 9.387035212920436e-05, "loss": 0.8451, "step": 151650 }, { "epoch": 2.662616970101301, "grad_norm": 0.059704940822562245, "learning_rate": 9.385923747697035e-05, "loss": 0.8479, "step": 151660 }, { "epoch": 2.6627925349813024, "grad_norm": 0.048580153425783174, "learning_rate": 9.384812291521119e-05, "loss": 0.8333, "step": 151670 }, { "epoch": 2.662968099861304, "grad_norm": 0.04601129438607629, "learning_rate": 9.383700844406769e-05, "loss": 0.851, "step": 151680 }, { "epoch": 2.6631436647413054, "grad_norm": 0.05491009124172223, "learning_rate": 9.382589406368062e-05, "loss": 0.8489, "step": 151690 }, { "epoch": 2.6633192296213064, "grad_norm": 0.05515910480758032, "learning_rate": 9.381477977419089e-05, "loss": 0.849, "step": 151700 }, { "epoch": 2.663494794501308, "grad_norm": 0.051289890581998336, "learning_rate": 9.380366557573927e-05, "loss": 0.84, "step": 151710 }, { "epoch": 2.6636703593813094, "grad_norm": 0.05430308707308694, "learning_rate": 9.379255146846654e-05, "loss": 0.8542, "step": 151720 }, { "epoch": 2.663845924261311, "grad_norm": 0.07004799457353754, "learning_rate": 9.37814374525136e-05, "loss": 0.8441, "step": 151730 }, { "epoch": 2.6640214891413123, "grad_norm": 0.05440321153917079, "learning_rate": 9.377032352802125e-05, "loss": 0.843, "step": 151740 }, { "epoch": 2.6641970540213133, "grad_norm": 0.055904892436044344, "learning_rate": 9.375920969513025e-05, "loss": 0.8403, "step": 151750 }, { "epoch": 2.664372618901315, "grad_norm": 0.05904186190022445, "learning_rate": 9.374809595398148e-05, "loss": 0.8488, "step": 151760 }, { "epoch": 2.6645481837813163, "grad_norm": 0.06199559719261382, "learning_rate": 9.373698230471572e-05, "loss": 0.8463, "step": 151770 }, { "epoch": 2.664723748661318, "grad_norm": 0.053340251714137386, "learning_rate": 9.372586874747378e-05, "loss": 0.8386, "step": 151780 }, { "epoch": 2.6648993135413193, "grad_norm": 0.0775580153289824, "learning_rate": 9.371475528239644e-05, "loss": 0.8444, "step": 151790 }, { "epoch": 2.6650748784213207, "grad_norm": 0.07194891174385153, "learning_rate": 9.370364190962461e-05, "loss": 0.8394, "step": 151800 }, { "epoch": 2.665250443301322, "grad_norm": 0.06135079128105748, "learning_rate": 9.369252862929902e-05, "loss": 0.8438, "step": 151810 }, { "epoch": 2.6654260081813232, "grad_norm": 0.04790855297114918, "learning_rate": 9.368141544156045e-05, "loss": 0.8416, "step": 151820 }, { "epoch": 2.6656015730613247, "grad_norm": 0.04914342030497403, "learning_rate": 9.367030234654979e-05, "loss": 0.8369, "step": 151830 }, { "epoch": 2.665777137941326, "grad_norm": 0.060655517535139404, "learning_rate": 9.365918934440781e-05, "loss": 0.8424, "step": 151840 }, { "epoch": 2.6659527028213277, "grad_norm": 0.046011456212257135, "learning_rate": 9.364807643527529e-05, "loss": 0.8423, "step": 151850 }, { "epoch": 2.666128267701329, "grad_norm": 0.05518479107555286, "learning_rate": 9.363696361929304e-05, "loss": 0.8435, "step": 151860 }, { "epoch": 2.66630383258133, "grad_norm": 0.05790758054613443, "learning_rate": 9.36258508966019e-05, "loss": 0.8375, "step": 151870 }, { "epoch": 2.666479397461332, "grad_norm": 0.054673538196099145, "learning_rate": 9.361473826734263e-05, "loss": 0.8468, "step": 151880 }, { "epoch": 2.666654962341333, "grad_norm": 0.06077453022078411, "learning_rate": 9.3603625731656e-05, "loss": 0.8491, "step": 151890 }, { "epoch": 2.6668305272213346, "grad_norm": 0.05617187864035669, "learning_rate": 9.359251328968289e-05, "loss": 0.8405, "step": 151900 }, { "epoch": 2.667006092101336, "grad_norm": 0.07229341166488415, "learning_rate": 9.358140094156407e-05, "loss": 0.8431, "step": 151910 }, { "epoch": 2.6671816569813376, "grad_norm": 0.0750717279055238, "learning_rate": 9.357028868744029e-05, "loss": 0.8451, "step": 151920 }, { "epoch": 2.667357221861339, "grad_norm": 0.06670722389305597, "learning_rate": 9.355917652745241e-05, "loss": 0.843, "step": 151930 }, { "epoch": 2.66753278674134, "grad_norm": 0.059290926293185116, "learning_rate": 9.354806446174117e-05, "loss": 0.8431, "step": 151940 }, { "epoch": 2.6677083516213416, "grad_norm": 0.058769299962455626, "learning_rate": 9.353695249044739e-05, "loss": 0.8424, "step": 151950 }, { "epoch": 2.667883916501343, "grad_norm": 0.04637076245513428, "learning_rate": 9.352584061371181e-05, "loss": 0.8425, "step": 151960 }, { "epoch": 2.6680594813813445, "grad_norm": 0.07310013270795306, "learning_rate": 9.351472883167534e-05, "loss": 0.8374, "step": 151970 }, { "epoch": 2.668235046261346, "grad_norm": 0.05701322261005276, "learning_rate": 9.350361714447864e-05, "loss": 0.851, "step": 151980 }, { "epoch": 2.668410611141347, "grad_norm": 0.08405645229184856, "learning_rate": 9.349250555226252e-05, "loss": 0.8452, "step": 151990 }, { "epoch": 2.668586176021349, "grad_norm": 0.047082007582514865, "learning_rate": 9.348139405516784e-05, "loss": 0.8417, "step": 152000 }, { "epoch": 2.66876174090135, "grad_norm": 0.06321431018641786, "learning_rate": 9.347028265333533e-05, "loss": 0.8387, "step": 152010 }, { "epoch": 2.6689373057813515, "grad_norm": 0.040195656497357464, "learning_rate": 9.34591713469058e-05, "loss": 0.8472, "step": 152020 }, { "epoch": 2.669112870661353, "grad_norm": 0.04249472151596116, "learning_rate": 9.344806013601996e-05, "loss": 0.8525, "step": 152030 }, { "epoch": 2.6692884355413544, "grad_norm": 0.06487958946452857, "learning_rate": 9.34369490208187e-05, "loss": 0.8427, "step": 152040 }, { "epoch": 2.669464000421356, "grad_norm": 0.06126634624242237, "learning_rate": 9.342583800144273e-05, "loss": 0.8496, "step": 152050 }, { "epoch": 2.669639565301357, "grad_norm": 0.0679857249555314, "learning_rate": 9.34147270780328e-05, "loss": 0.8409, "step": 152060 }, { "epoch": 2.6698151301813584, "grad_norm": 0.05596262746697922, "learning_rate": 9.340361625072978e-05, "loss": 0.8394, "step": 152070 }, { "epoch": 2.66999069506136, "grad_norm": 0.05510650338451066, "learning_rate": 9.33925055196744e-05, "loss": 0.8445, "step": 152080 }, { "epoch": 2.6701662599413614, "grad_norm": 0.046744292785754205, "learning_rate": 9.33813948850074e-05, "loss": 0.8406, "step": 152090 }, { "epoch": 2.670341824821363, "grad_norm": 0.06375653077670541, "learning_rate": 9.337028434686961e-05, "loss": 0.8518, "step": 152100 }, { "epoch": 2.670517389701364, "grad_norm": 0.07576922465788263, "learning_rate": 9.33591739054018e-05, "loss": 0.8388, "step": 152110 }, { "epoch": 2.670692954581366, "grad_norm": 0.06424313931645602, "learning_rate": 9.334806356074471e-05, "loss": 0.8491, "step": 152120 }, { "epoch": 2.670868519461367, "grad_norm": 0.049950357614262995, "learning_rate": 9.333695331303907e-05, "loss": 0.8419, "step": 152130 }, { "epoch": 2.6710440843413683, "grad_norm": 0.045742338660962355, "learning_rate": 9.332584316242576e-05, "loss": 0.8424, "step": 152140 }, { "epoch": 2.67121964922137, "grad_norm": 0.05841628490872988, "learning_rate": 9.331473310904548e-05, "loss": 0.8515, "step": 152150 }, { "epoch": 2.6713952141013713, "grad_norm": 0.05566140485016339, "learning_rate": 9.330362315303896e-05, "loss": 0.8454, "step": 152160 }, { "epoch": 2.6715707789813727, "grad_norm": 0.04597940736744022, "learning_rate": 9.329251329454704e-05, "loss": 0.8408, "step": 152170 }, { "epoch": 2.6717463438613738, "grad_norm": 0.053627364603004254, "learning_rate": 9.328140353371047e-05, "loss": 0.844, "step": 152180 }, { "epoch": 2.6719219087413753, "grad_norm": 0.04590096516072428, "learning_rate": 9.327029387066999e-05, "loss": 0.8469, "step": 152190 }, { "epoch": 2.6720974736213767, "grad_norm": 0.05582242311212083, "learning_rate": 9.325918430556634e-05, "loss": 0.8444, "step": 152200 }, { "epoch": 2.672273038501378, "grad_norm": 0.054299658120264514, "learning_rate": 9.324807483854034e-05, "loss": 0.8484, "step": 152210 }, { "epoch": 2.6724486033813797, "grad_norm": 0.058437884344375864, "learning_rate": 9.323696546973271e-05, "loss": 0.8483, "step": 152220 }, { "epoch": 2.6726241682613807, "grad_norm": 0.07101100283848001, "learning_rate": 9.322585619928418e-05, "loss": 0.8434, "step": 152230 }, { "epoch": 2.6727997331413826, "grad_norm": 0.05738182968370937, "learning_rate": 9.321474702733559e-05, "loss": 0.8389, "step": 152240 }, { "epoch": 2.6729752980213837, "grad_norm": 0.045923186677970296, "learning_rate": 9.320363795402763e-05, "loss": 0.8495, "step": 152250 }, { "epoch": 2.673150862901385, "grad_norm": 0.06105469594235696, "learning_rate": 9.319252897950104e-05, "loss": 0.8379, "step": 152260 }, { "epoch": 2.6733264277813866, "grad_norm": 0.05680237358663035, "learning_rate": 9.318142010389666e-05, "loss": 0.8407, "step": 152270 }, { "epoch": 2.673501992661388, "grad_norm": 0.08997906213932236, "learning_rate": 9.317031132735515e-05, "loss": 0.8449, "step": 152280 }, { "epoch": 2.6736775575413896, "grad_norm": 0.049770088237063526, "learning_rate": 9.315920265001733e-05, "loss": 0.8311, "step": 152290 }, { "epoch": 2.6738531224213906, "grad_norm": 0.0770280048078519, "learning_rate": 9.314809407202385e-05, "loss": 0.8435, "step": 152300 }, { "epoch": 2.674028687301392, "grad_norm": 0.04722590106936225, "learning_rate": 9.313698559351558e-05, "loss": 0.8395, "step": 152310 }, { "epoch": 2.6742042521813936, "grad_norm": 0.06355255071593646, "learning_rate": 9.312587721463322e-05, "loss": 0.8383, "step": 152320 }, { "epoch": 2.674379817061395, "grad_norm": 0.05112094531701857, "learning_rate": 9.311476893551744e-05, "loss": 0.84, "step": 152330 }, { "epoch": 2.6745553819413965, "grad_norm": 0.07340276187826353, "learning_rate": 9.31036607563091e-05, "loss": 0.8453, "step": 152340 }, { "epoch": 2.674730946821398, "grad_norm": 0.0525384378728642, "learning_rate": 9.309255267714887e-05, "loss": 0.8499, "step": 152350 }, { "epoch": 2.6749065117013995, "grad_norm": 0.05629619751554896, "learning_rate": 9.308144469817755e-05, "loss": 0.8416, "step": 152360 }, { "epoch": 2.6750820765814005, "grad_norm": 0.05136062711568219, "learning_rate": 9.307033681953579e-05, "loss": 0.8465, "step": 152370 }, { "epoch": 2.675257641461402, "grad_norm": 0.04589431471619701, "learning_rate": 9.305922904136442e-05, "loss": 0.8521, "step": 152380 }, { "epoch": 2.6754332063414035, "grad_norm": 0.08167448146657148, "learning_rate": 9.304812136380415e-05, "loss": 0.8408, "step": 152390 }, { "epoch": 2.675608771221405, "grad_norm": 0.058154736362337346, "learning_rate": 9.303701378699564e-05, "loss": 0.8422, "step": 152400 }, { "epoch": 2.6757843361014064, "grad_norm": 0.0577268815333584, "learning_rate": 9.302590631107973e-05, "loss": 0.8434, "step": 152410 }, { "epoch": 2.6759599009814075, "grad_norm": 0.07425919604586005, "learning_rate": 9.301479893619713e-05, "loss": 0.8495, "step": 152420 }, { "epoch": 2.676135465861409, "grad_norm": 0.07776117096970407, "learning_rate": 9.300369166248854e-05, "loss": 0.8431, "step": 152430 }, { "epoch": 2.6763110307414104, "grad_norm": 0.0615278036862135, "learning_rate": 9.299258449009473e-05, "loss": 0.8449, "step": 152440 }, { "epoch": 2.676486595621412, "grad_norm": 0.06394509945224608, "learning_rate": 9.298147741915641e-05, "loss": 0.8469, "step": 152450 }, { "epoch": 2.6766621605014134, "grad_norm": 0.08245693775963499, "learning_rate": 9.29703704498143e-05, "loss": 0.8429, "step": 152460 }, { "epoch": 2.676837725381415, "grad_norm": 0.054530249223794916, "learning_rate": 9.29592635822091e-05, "loss": 0.8358, "step": 152470 }, { "epoch": 2.6770132902614163, "grad_norm": 0.05399329890527645, "learning_rate": 9.294815681648163e-05, "loss": 0.8423, "step": 152480 }, { "epoch": 2.6771888551414174, "grad_norm": 0.044577209272264995, "learning_rate": 9.293705015277251e-05, "loss": 0.8401, "step": 152490 }, { "epoch": 2.677364420021419, "grad_norm": 0.05659341228061812, "learning_rate": 9.292594359122251e-05, "loss": 0.8377, "step": 152500 }, { "epoch": 2.6775399849014203, "grad_norm": 0.10072920592133022, "learning_rate": 9.291483713197237e-05, "loss": 0.8464, "step": 152510 }, { "epoch": 2.677715549781422, "grad_norm": 0.0588887859597484, "learning_rate": 9.29037307751628e-05, "loss": 0.8465, "step": 152520 }, { "epoch": 2.6778911146614233, "grad_norm": 0.07134665574745754, "learning_rate": 9.289262452093451e-05, "loss": 0.8477, "step": 152530 }, { "epoch": 2.6780666795414243, "grad_norm": 0.04715900935713044, "learning_rate": 9.288151836942821e-05, "loss": 0.8354, "step": 152540 }, { "epoch": 2.6782422444214258, "grad_norm": 0.06731306263124895, "learning_rate": 9.287041232078465e-05, "loss": 0.8522, "step": 152550 }, { "epoch": 2.6784178093014273, "grad_norm": 0.04837418460872785, "learning_rate": 9.28593063751445e-05, "loss": 0.8431, "step": 152560 }, { "epoch": 2.6785933741814287, "grad_norm": 0.07649811518163255, "learning_rate": 9.284820053264847e-05, "loss": 0.8455, "step": 152570 }, { "epoch": 2.67876893906143, "grad_norm": 0.052845130564030285, "learning_rate": 9.283709479343735e-05, "loss": 0.8413, "step": 152580 }, { "epoch": 2.6789445039414317, "grad_norm": 0.05721379556546547, "learning_rate": 9.28259891576518e-05, "loss": 0.8412, "step": 152590 }, { "epoch": 2.679120068821433, "grad_norm": 0.05338628692536719, "learning_rate": 9.281488362543252e-05, "loss": 0.8471, "step": 152600 }, { "epoch": 2.679295633701434, "grad_norm": 0.048449545287269136, "learning_rate": 9.280377819692025e-05, "loss": 0.8436, "step": 152610 }, { "epoch": 2.6794711985814357, "grad_norm": 0.06101197794163572, "learning_rate": 9.279267287225566e-05, "loss": 0.8449, "step": 152620 }, { "epoch": 2.679646763461437, "grad_norm": 0.049279091553564115, "learning_rate": 9.27815676515795e-05, "loss": 0.8446, "step": 152630 }, { "epoch": 2.6798223283414386, "grad_norm": 0.04948952130134291, "learning_rate": 9.277046253503242e-05, "loss": 0.8443, "step": 152640 }, { "epoch": 2.67999789322144, "grad_norm": 0.0571028413126792, "learning_rate": 9.27593575227552e-05, "loss": 0.8334, "step": 152650 }, { "epoch": 2.680173458101441, "grad_norm": 0.054860868213983234, "learning_rate": 9.274825261488848e-05, "loss": 0.8439, "step": 152660 }, { "epoch": 2.680349022981443, "grad_norm": 0.058544555806549714, "learning_rate": 9.273714781157295e-05, "loss": 0.8461, "step": 152670 }, { "epoch": 2.680524587861444, "grad_norm": 0.04527271464649203, "learning_rate": 9.272604311294937e-05, "loss": 0.8475, "step": 152680 }, { "epoch": 2.6807001527414456, "grad_norm": 0.04802682328970565, "learning_rate": 9.271493851915842e-05, "loss": 0.8498, "step": 152690 }, { "epoch": 2.680875717621447, "grad_norm": 0.04704125637769525, "learning_rate": 9.270383403034079e-05, "loss": 0.8522, "step": 152700 }, { "epoch": 2.6810512825014485, "grad_norm": 0.05214428265206426, "learning_rate": 9.269272964663714e-05, "loss": 0.8365, "step": 152710 }, { "epoch": 2.68122684738145, "grad_norm": 0.063973721938654, "learning_rate": 9.268162536818821e-05, "loss": 0.8453, "step": 152720 }, { "epoch": 2.681402412261451, "grad_norm": 0.062142365247881734, "learning_rate": 9.267052119513469e-05, "loss": 0.8466, "step": 152730 }, { "epoch": 2.6815779771414525, "grad_norm": 0.06605197930079247, "learning_rate": 9.265941712761723e-05, "loss": 0.8446, "step": 152740 }, { "epoch": 2.681753542021454, "grad_norm": 0.1240452914172638, "learning_rate": 9.26483131657766e-05, "loss": 0.843, "step": 152750 }, { "epoch": 2.6819291069014555, "grad_norm": 0.0476867999128541, "learning_rate": 9.263720930975342e-05, "loss": 0.8395, "step": 152760 }, { "epoch": 2.682104671781457, "grad_norm": 0.05701071207977274, "learning_rate": 9.262610555968841e-05, "loss": 0.8386, "step": 152770 }, { "epoch": 2.682280236661458, "grad_norm": 0.05165825994183015, "learning_rate": 9.261500191572224e-05, "loss": 0.8479, "step": 152780 }, { "epoch": 2.68245580154146, "grad_norm": 0.058579349653477185, "learning_rate": 9.26038983779956e-05, "loss": 0.8468, "step": 152790 }, { "epoch": 2.682631366421461, "grad_norm": 0.05513710021594284, "learning_rate": 9.25927949466492e-05, "loss": 0.8461, "step": 152800 }, { "epoch": 2.6828069313014624, "grad_norm": 0.05072131288114822, "learning_rate": 9.258169162182364e-05, "loss": 0.8391, "step": 152810 }, { "epoch": 2.682982496181464, "grad_norm": 0.054049144612205204, "learning_rate": 9.257058840365973e-05, "loss": 0.8323, "step": 152820 }, { "epoch": 2.6831580610614654, "grad_norm": 0.04978363878546913, "learning_rate": 9.255948529229807e-05, "loss": 0.8428, "step": 152830 }, { "epoch": 2.683333625941467, "grad_norm": 0.05273420883704863, "learning_rate": 9.254838228787929e-05, "loss": 0.8385, "step": 152840 }, { "epoch": 2.683509190821468, "grad_norm": 0.052685064740719184, "learning_rate": 9.253727939054419e-05, "loss": 0.85, "step": 152850 }, { "epoch": 2.6836847557014694, "grad_norm": 0.05680890757034855, "learning_rate": 9.252617660043336e-05, "loss": 0.8451, "step": 152860 }, { "epoch": 2.683860320581471, "grad_norm": 0.05575378143869414, "learning_rate": 9.25150739176875e-05, "loss": 0.8394, "step": 152870 }, { "epoch": 2.6840358854614723, "grad_norm": 0.06162964861425104, "learning_rate": 9.250397134244726e-05, "loss": 0.8488, "step": 152880 }, { "epoch": 2.684211450341474, "grad_norm": 0.04986582545277134, "learning_rate": 9.249286887485335e-05, "loss": 0.851, "step": 152890 }, { "epoch": 2.684387015221475, "grad_norm": 0.036661447678279696, "learning_rate": 9.248176651504643e-05, "loss": 0.8373, "step": 152900 }, { "epoch": 2.6845625801014767, "grad_norm": 0.051883593238355834, "learning_rate": 9.247066426316712e-05, "loss": 0.8481, "step": 152910 }, { "epoch": 2.684738144981478, "grad_norm": 0.05706996063708991, "learning_rate": 9.245956211935617e-05, "loss": 0.8455, "step": 152920 }, { "epoch": 2.6849137098614793, "grad_norm": 0.052864794094417945, "learning_rate": 9.24484600837542e-05, "loss": 0.8514, "step": 152930 }, { "epoch": 2.6850892747414807, "grad_norm": 0.09781838510003275, "learning_rate": 9.243735815650186e-05, "loss": 0.8422, "step": 152940 }, { "epoch": 2.685264839621482, "grad_norm": 0.063510635387078, "learning_rate": 9.242625633773985e-05, "loss": 0.8418, "step": 152950 }, { "epoch": 2.6854404045014837, "grad_norm": 0.057780057768587495, "learning_rate": 9.241515462760881e-05, "loss": 0.8523, "step": 152960 }, { "epoch": 2.6856159693814847, "grad_norm": 0.06530525194478863, "learning_rate": 9.240405302624941e-05, "loss": 0.8328, "step": 152970 }, { "epoch": 2.685791534261486, "grad_norm": 0.05464484202693666, "learning_rate": 9.239295153380228e-05, "loss": 0.8437, "step": 152980 }, { "epoch": 2.6859670991414877, "grad_norm": 0.04909995707885833, "learning_rate": 9.238185015040814e-05, "loss": 0.8426, "step": 152990 }, { "epoch": 2.686142664021489, "grad_norm": 0.047379316586592356, "learning_rate": 9.237074887620761e-05, "loss": 0.8469, "step": 153000 }, { "epoch": 2.6863182289014906, "grad_norm": 0.06670203320123567, "learning_rate": 9.23596477113413e-05, "loss": 0.8397, "step": 153010 }, { "epoch": 2.686493793781492, "grad_norm": 0.059993525853792314, "learning_rate": 9.234854665594994e-05, "loss": 0.8358, "step": 153020 }, { "epoch": 2.6866693586614936, "grad_norm": 0.060422412492559904, "learning_rate": 9.233744571017418e-05, "loss": 0.8414, "step": 153030 }, { "epoch": 2.6868449235414946, "grad_norm": 0.06027954086393374, "learning_rate": 9.232634487415461e-05, "loss": 0.8421, "step": 153040 }, { "epoch": 2.687020488421496, "grad_norm": 0.06557348287116291, "learning_rate": 9.231524414803191e-05, "loss": 0.8435, "step": 153050 }, { "epoch": 2.6871960533014976, "grad_norm": 0.057888168681653814, "learning_rate": 9.230414353194675e-05, "loss": 0.8461, "step": 153060 }, { "epoch": 2.687371618181499, "grad_norm": 0.05171648842308461, "learning_rate": 9.229304302603975e-05, "loss": 0.8455, "step": 153070 }, { "epoch": 2.6875471830615005, "grad_norm": 0.05724761196840583, "learning_rate": 9.228194263045153e-05, "loss": 0.8417, "step": 153080 }, { "epoch": 2.6877227479415016, "grad_norm": 0.05442034676275662, "learning_rate": 9.22708423453228e-05, "loss": 0.8439, "step": 153090 }, { "epoch": 2.687898312821503, "grad_norm": 0.05721631493794233, "learning_rate": 9.225974217079418e-05, "loss": 0.8511, "step": 153100 }, { "epoch": 2.6880738777015045, "grad_norm": 0.04872197576270406, "learning_rate": 9.22486421070063e-05, "loss": 0.8492, "step": 153110 }, { "epoch": 2.688249442581506, "grad_norm": 0.09018986461301493, "learning_rate": 9.223754215409978e-05, "loss": 0.8423, "step": 153120 }, { "epoch": 2.6884250074615075, "grad_norm": 0.053292042048418246, "learning_rate": 9.22264423122153e-05, "loss": 0.839, "step": 153130 }, { "epoch": 2.688600572341509, "grad_norm": 0.05093580223636449, "learning_rate": 9.221534258149348e-05, "loss": 0.8379, "step": 153140 }, { "epoch": 2.6887761372215104, "grad_norm": 0.0618175551769233, "learning_rate": 9.220424296207491e-05, "loss": 0.8404, "step": 153150 }, { "epoch": 2.6889517021015115, "grad_norm": 0.06750134360588075, "learning_rate": 9.219314345410033e-05, "loss": 0.8516, "step": 153160 }, { "epoch": 2.689127266981513, "grad_norm": 0.05308286021982019, "learning_rate": 9.21820440577103e-05, "loss": 0.8513, "step": 153170 }, { "epoch": 2.6893028318615144, "grad_norm": 0.05170722186165824, "learning_rate": 9.21709447730454e-05, "loss": 0.8459, "step": 153180 }, { "epoch": 2.689478396741516, "grad_norm": 0.05559983936481459, "learning_rate": 9.215984560024637e-05, "loss": 0.8442, "step": 153190 }, { "epoch": 2.6896539616215174, "grad_norm": 0.05506347681803979, "learning_rate": 9.214874653945379e-05, "loss": 0.8468, "step": 153200 }, { "epoch": 2.6898295265015184, "grad_norm": 0.06456457858920216, "learning_rate": 9.213764759080829e-05, "loss": 0.8459, "step": 153210 }, { "epoch": 2.69000509138152, "grad_norm": 0.06547391528723716, "learning_rate": 9.212654875445047e-05, "loss": 0.8418, "step": 153220 }, { "epoch": 2.6901806562615214, "grad_norm": 0.06254885356101456, "learning_rate": 9.211545003052101e-05, "loss": 0.8437, "step": 153230 }, { "epoch": 2.690356221141523, "grad_norm": 0.05929870777963771, "learning_rate": 9.210435141916049e-05, "loss": 0.8445, "step": 153240 }, { "epoch": 2.6905317860215243, "grad_norm": 0.05911177851363282, "learning_rate": 9.20932529205095e-05, "loss": 0.8476, "step": 153250 }, { "epoch": 2.690707350901526, "grad_norm": 0.05018336492926591, "learning_rate": 9.208215453470875e-05, "loss": 0.8483, "step": 153260 }, { "epoch": 2.6908829157815273, "grad_norm": 0.06641238102218287, "learning_rate": 9.207105626189882e-05, "loss": 0.8412, "step": 153270 }, { "epoch": 2.6910584806615283, "grad_norm": 0.051386439055780976, "learning_rate": 9.20599581022203e-05, "loss": 0.8446, "step": 153280 }, { "epoch": 2.69123404554153, "grad_norm": 0.04711846172737254, "learning_rate": 9.204886005581381e-05, "loss": 0.8362, "step": 153290 }, { "epoch": 2.6914096104215313, "grad_norm": 0.06187678090730837, "learning_rate": 9.203776212282e-05, "loss": 0.8462, "step": 153300 }, { "epoch": 2.6915851753015327, "grad_norm": 0.04873260961512266, "learning_rate": 9.202666430337947e-05, "loss": 0.8467, "step": 153310 }, { "epoch": 2.691760740181534, "grad_norm": 0.10651066365578471, "learning_rate": 9.201556659763276e-05, "loss": 0.8439, "step": 153320 }, { "epoch": 2.6919363050615353, "grad_norm": 0.06242569013089358, "learning_rate": 9.200446900572062e-05, "loss": 0.8394, "step": 153330 }, { "epoch": 2.692111869941537, "grad_norm": 0.05534419357814975, "learning_rate": 9.199337152778357e-05, "loss": 0.8447, "step": 153340 }, { "epoch": 2.692287434821538, "grad_norm": 0.06538293376870602, "learning_rate": 9.198227416396216e-05, "loss": 0.8462, "step": 153350 }, { "epoch": 2.6924629997015397, "grad_norm": 0.0530169205770271, "learning_rate": 9.197117691439713e-05, "loss": 0.8431, "step": 153360 }, { "epoch": 2.692638564581541, "grad_norm": 0.045660905166294175, "learning_rate": 9.196007977922902e-05, "loss": 0.8432, "step": 153370 }, { "epoch": 2.6928141294615426, "grad_norm": 0.052563008604315724, "learning_rate": 9.194898275859841e-05, "loss": 0.842, "step": 153380 }, { "epoch": 2.692989694341544, "grad_norm": 0.05196975902888274, "learning_rate": 9.193788585264591e-05, "loss": 0.8412, "step": 153390 }, { "epoch": 2.693165259221545, "grad_norm": 0.07110607277942092, "learning_rate": 9.192678906151216e-05, "loss": 0.844, "step": 153400 }, { "epoch": 2.6933408241015466, "grad_norm": 0.05035163265839753, "learning_rate": 9.191569238533773e-05, "loss": 0.8377, "step": 153410 }, { "epoch": 2.693516388981548, "grad_norm": 0.06549385743011141, "learning_rate": 9.190459582426318e-05, "loss": 0.8429, "step": 153420 }, { "epoch": 2.6936919538615496, "grad_norm": 0.06503531568155219, "learning_rate": 9.189349937842919e-05, "loss": 0.8517, "step": 153430 }, { "epoch": 2.693867518741551, "grad_norm": 0.05598572994059235, "learning_rate": 9.18824030479763e-05, "loss": 0.8329, "step": 153440 }, { "epoch": 2.694043083621552, "grad_norm": 0.0514939793722406, "learning_rate": 9.187130683304511e-05, "loss": 0.8436, "step": 153450 }, { "epoch": 2.694218648501554, "grad_norm": 0.053348599832058384, "learning_rate": 9.186021073377619e-05, "loss": 0.8374, "step": 153460 }, { "epoch": 2.694394213381555, "grad_norm": 0.0529937935921469, "learning_rate": 9.184911475031019e-05, "loss": 0.8492, "step": 153470 }, { "epoch": 2.6945697782615565, "grad_norm": 0.04955633215574851, "learning_rate": 9.183801888278764e-05, "loss": 0.8443, "step": 153480 }, { "epoch": 2.694745343141558, "grad_norm": 0.05057965255667922, "learning_rate": 9.182692313134911e-05, "loss": 0.8396, "step": 153490 }, { "epoch": 2.6949209080215595, "grad_norm": 0.05475695338081478, "learning_rate": 9.181582749613528e-05, "loss": 0.8414, "step": 153500 }, { "epoch": 2.695096472901561, "grad_norm": 0.04393662105109547, "learning_rate": 9.180473197728666e-05, "loss": 0.8397, "step": 153510 }, { "epoch": 2.695272037781562, "grad_norm": 0.049039913766967876, "learning_rate": 9.17936365749438e-05, "loss": 0.8434, "step": 153520 }, { "epoch": 2.6954476026615635, "grad_norm": 0.04907528828096971, "learning_rate": 9.178254128924738e-05, "loss": 0.8341, "step": 153530 }, { "epoch": 2.695623167541565, "grad_norm": 0.053067124460201155, "learning_rate": 9.177144612033794e-05, "loss": 0.842, "step": 153540 }, { "epoch": 2.6957987324215664, "grad_norm": 0.046878226473837645, "learning_rate": 9.176035106835602e-05, "loss": 0.8465, "step": 153550 }, { "epoch": 2.695974297301568, "grad_norm": 0.08530757232935116, "learning_rate": 9.17492561334422e-05, "loss": 0.8449, "step": 153560 }, { "epoch": 2.696149862181569, "grad_norm": 0.06088777119722741, "learning_rate": 9.173816131573713e-05, "loss": 0.8421, "step": 153570 }, { "epoch": 2.696325427061571, "grad_norm": 0.050517659489040856, "learning_rate": 9.172706661538131e-05, "loss": 0.84, "step": 153580 }, { "epoch": 2.696500991941572, "grad_norm": 0.06578177206286466, "learning_rate": 9.17159720325153e-05, "loss": 0.8447, "step": 153590 }, { "epoch": 2.6966765568215734, "grad_norm": 0.06962017313922292, "learning_rate": 9.170487756727974e-05, "loss": 0.8388, "step": 153600 }, { "epoch": 2.696852121701575, "grad_norm": 0.07793316376677074, "learning_rate": 9.169378321981515e-05, "loss": 0.8365, "step": 153610 }, { "epoch": 2.6970276865815763, "grad_norm": 0.06866811149347585, "learning_rate": 9.168268899026212e-05, "loss": 0.8445, "step": 153620 }, { "epoch": 2.697203251461578, "grad_norm": 0.048853286756278994, "learning_rate": 9.16715948787612e-05, "loss": 0.8395, "step": 153630 }, { "epoch": 2.697378816341579, "grad_norm": 0.06996334532792522, "learning_rate": 9.166050088545296e-05, "loss": 0.8413, "step": 153640 }, { "epoch": 2.6975543812215803, "grad_norm": 0.04943857005628, "learning_rate": 9.164940701047798e-05, "loss": 0.8343, "step": 153650 }, { "epoch": 2.697729946101582, "grad_norm": 0.06516058592031486, "learning_rate": 9.163831325397676e-05, "loss": 0.8473, "step": 153660 }, { "epoch": 2.6979055109815833, "grad_norm": 0.05561851446192938, "learning_rate": 9.162721961608997e-05, "loss": 0.8395, "step": 153670 }, { "epoch": 2.6980810758615847, "grad_norm": 0.07455233133567626, "learning_rate": 9.161612609695807e-05, "loss": 0.8339, "step": 153680 }, { "epoch": 2.6982566407415858, "grad_norm": 0.07876281509117587, "learning_rate": 9.160503269672162e-05, "loss": 0.8388, "step": 153690 }, { "epoch": 2.6984322056215877, "grad_norm": 0.07047235925325852, "learning_rate": 9.159393941552123e-05, "loss": 0.836, "step": 153700 }, { "epoch": 2.6986077705015887, "grad_norm": 0.05342458521997134, "learning_rate": 9.158284625349746e-05, "loss": 0.8401, "step": 153710 }, { "epoch": 2.69878333538159, "grad_norm": 0.07257108756036125, "learning_rate": 9.157175321079081e-05, "loss": 0.8528, "step": 153720 }, { "epoch": 2.6989589002615917, "grad_norm": 0.0721117561253353, "learning_rate": 9.156066028754184e-05, "loss": 0.8447, "step": 153730 }, { "epoch": 2.699134465141593, "grad_norm": 0.06233727174805513, "learning_rate": 9.154956748389112e-05, "loss": 0.8331, "step": 153740 }, { "epoch": 2.6993100300215946, "grad_norm": 0.05722367628238072, "learning_rate": 9.153847479997923e-05, "loss": 0.8376, "step": 153750 }, { "epoch": 2.6994855949015957, "grad_norm": 0.06261171647119032, "learning_rate": 9.15273822359466e-05, "loss": 0.847, "step": 153760 }, { "epoch": 2.699661159781597, "grad_norm": 0.0556007451751719, "learning_rate": 9.151628979193391e-05, "loss": 0.8412, "step": 153770 }, { "epoch": 2.6998367246615986, "grad_norm": 0.053886841567105215, "learning_rate": 9.150519746808164e-05, "loss": 0.8418, "step": 153780 }, { "epoch": 2.7000122895416, "grad_norm": 0.05779219394679433, "learning_rate": 9.149410526453034e-05, "loss": 0.8449, "step": 153790 }, { "epoch": 2.7001878544216016, "grad_norm": 0.05190953529280323, "learning_rate": 9.148301318142052e-05, "loss": 0.8456, "step": 153800 }, { "epoch": 2.700363419301603, "grad_norm": 0.04780340208951375, "learning_rate": 9.147192121889277e-05, "loss": 0.8444, "step": 153810 }, { "epoch": 2.7005389841816045, "grad_norm": 0.06215655355941395, "learning_rate": 9.146082937708762e-05, "loss": 0.8454, "step": 153820 }, { "epoch": 2.7007145490616056, "grad_norm": 0.06527664480600504, "learning_rate": 9.144973765614553e-05, "loss": 0.8459, "step": 153830 }, { "epoch": 2.700890113941607, "grad_norm": 0.05796336598298572, "learning_rate": 9.143864605620718e-05, "loss": 0.8417, "step": 153840 }, { "epoch": 2.7010656788216085, "grad_norm": 0.07369426273021604, "learning_rate": 9.142755457741298e-05, "loss": 0.8406, "step": 153850 }, { "epoch": 2.70124124370161, "grad_norm": 0.0471214403386902, "learning_rate": 9.141646321990344e-05, "loss": 0.8385, "step": 153860 }, { "epoch": 2.7014168085816115, "grad_norm": 0.06278374196428196, "learning_rate": 9.14053719838192e-05, "loss": 0.8455, "step": 153870 }, { "epoch": 2.7015923734616125, "grad_norm": 0.05743047792616827, "learning_rate": 9.139428086930076e-05, "loss": 0.8481, "step": 153880 }, { "epoch": 2.701767938341614, "grad_norm": 0.06436924970980892, "learning_rate": 9.13831898764886e-05, "loss": 0.835, "step": 153890 }, { "epoch": 2.7019435032216155, "grad_norm": 0.051876956911328514, "learning_rate": 9.137209900552325e-05, "loss": 0.8415, "step": 153900 }, { "epoch": 2.702119068101617, "grad_norm": 0.058844055478789764, "learning_rate": 9.136100825654526e-05, "loss": 0.8446, "step": 153910 }, { "epoch": 2.7022946329816184, "grad_norm": 0.05207096601907242, "learning_rate": 9.134991762969515e-05, "loss": 0.8422, "step": 153920 }, { "epoch": 2.70247019786162, "grad_norm": 0.044200884685514225, "learning_rate": 9.133882712511341e-05, "loss": 0.8397, "step": 153930 }, { "epoch": 2.7026457627416214, "grad_norm": 0.04316051261683632, "learning_rate": 9.13277367429406e-05, "loss": 0.8487, "step": 153940 }, { "epoch": 2.7028213276216224, "grad_norm": 0.07076930291847372, "learning_rate": 9.131664648331724e-05, "loss": 0.8445, "step": 153950 }, { "epoch": 2.702996892501624, "grad_norm": 0.044837474175757376, "learning_rate": 9.13055563463838e-05, "loss": 0.8436, "step": 153960 }, { "epoch": 2.7031724573816254, "grad_norm": 0.05500627010720617, "learning_rate": 9.129446633228083e-05, "loss": 0.8472, "step": 153970 }, { "epoch": 2.703348022261627, "grad_norm": 0.04603671719646018, "learning_rate": 9.128337644114881e-05, "loss": 0.8512, "step": 153980 }, { "epoch": 2.7035235871416283, "grad_norm": 0.04999783791144092, "learning_rate": 9.12722866731283e-05, "loss": 0.8421, "step": 153990 }, { "epoch": 2.7036991520216294, "grad_norm": 0.0525331583989158, "learning_rate": 9.126119702835975e-05, "loss": 0.8382, "step": 154000 }, { "epoch": 2.703874716901631, "grad_norm": 0.044358415572394096, "learning_rate": 9.125010750698375e-05, "loss": 0.8454, "step": 154010 }, { "epoch": 2.7040502817816323, "grad_norm": 0.06310074062970107, "learning_rate": 9.123901810914073e-05, "loss": 0.8495, "step": 154020 }, { "epoch": 2.704225846661634, "grad_norm": 0.056123534307589096, "learning_rate": 9.122792883497119e-05, "loss": 0.8393, "step": 154030 }, { "epoch": 2.7044014115416353, "grad_norm": 0.05940787139523218, "learning_rate": 9.121683968461569e-05, "loss": 0.8468, "step": 154040 }, { "epoch": 2.7045769764216367, "grad_norm": 0.06157938857338847, "learning_rate": 9.120575065821471e-05, "loss": 0.8435, "step": 154050 }, { "epoch": 2.7047525413016382, "grad_norm": 0.05517299525637102, "learning_rate": 9.119466175590876e-05, "loss": 0.8462, "step": 154060 }, { "epoch": 2.7049281061816393, "grad_norm": 0.06666601081659004, "learning_rate": 9.11835729778383e-05, "loss": 0.8379, "step": 154070 }, { "epoch": 2.7051036710616407, "grad_norm": 0.048283391591432495, "learning_rate": 9.117248432414387e-05, "loss": 0.8406, "step": 154080 }, { "epoch": 2.705279235941642, "grad_norm": 0.05275829574982348, "learning_rate": 9.116139579496595e-05, "loss": 0.8488, "step": 154090 }, { "epoch": 2.7054548008216437, "grad_norm": 0.0455556000816163, "learning_rate": 9.115030739044497e-05, "loss": 0.8545, "step": 154100 }, { "epoch": 2.705630365701645, "grad_norm": 0.05361970074250794, "learning_rate": 9.113921911072155e-05, "loss": 0.8462, "step": 154110 }, { "epoch": 2.705805930581646, "grad_norm": 0.062136363167253454, "learning_rate": 9.11281309559361e-05, "loss": 0.8429, "step": 154120 }, { "epoch": 2.705981495461648, "grad_norm": 0.051722349490947825, "learning_rate": 9.111704292622913e-05, "loss": 0.8332, "step": 154130 }, { "epoch": 2.706157060341649, "grad_norm": 0.05756188599949267, "learning_rate": 9.11059550217411e-05, "loss": 0.8449, "step": 154140 }, { "epoch": 2.7063326252216506, "grad_norm": 0.05775776207357769, "learning_rate": 9.109486724261254e-05, "loss": 0.8443, "step": 154150 }, { "epoch": 2.706508190101652, "grad_norm": 0.07700578540503504, "learning_rate": 9.10837795889839e-05, "loss": 0.8346, "step": 154160 }, { "epoch": 2.7066837549816536, "grad_norm": 0.05221660804497351, "learning_rate": 9.107269206099563e-05, "loss": 0.8392, "step": 154170 }, { "epoch": 2.706859319861655, "grad_norm": 0.04980954410128407, "learning_rate": 9.106160465878832e-05, "loss": 0.8367, "step": 154180 }, { "epoch": 2.707034884741656, "grad_norm": 0.056611677114973495, "learning_rate": 9.105051738250236e-05, "loss": 0.8443, "step": 154190 }, { "epoch": 2.7072104496216576, "grad_norm": 0.05638718061439771, "learning_rate": 9.103943023227822e-05, "loss": 0.8407, "step": 154200 }, { "epoch": 2.707386014501659, "grad_norm": 0.04058570117672139, "learning_rate": 9.102834320825643e-05, "loss": 0.8506, "step": 154210 }, { "epoch": 2.7075615793816605, "grad_norm": 0.054873678756784304, "learning_rate": 9.101725631057747e-05, "loss": 0.837, "step": 154220 }, { "epoch": 2.707737144261662, "grad_norm": 0.05080532146899716, "learning_rate": 9.100616953938176e-05, "loss": 0.8479, "step": 154230 }, { "epoch": 2.707912709141663, "grad_norm": 0.04676975465942692, "learning_rate": 9.09950828948098e-05, "loss": 0.8404, "step": 154240 }, { "epoch": 2.708088274021665, "grad_norm": 0.058767114667575754, "learning_rate": 9.098399637700207e-05, "loss": 0.85, "step": 154250 }, { "epoch": 2.708263838901666, "grad_norm": 0.06564695790522658, "learning_rate": 9.097290998609902e-05, "loss": 0.8439, "step": 154260 }, { "epoch": 2.7084394037816675, "grad_norm": 0.044204615132739, "learning_rate": 9.096182372224108e-05, "loss": 0.8471, "step": 154270 }, { "epoch": 2.708614968661669, "grad_norm": 0.052009669332128555, "learning_rate": 9.09507375855688e-05, "loss": 0.8393, "step": 154280 }, { "epoch": 2.7087905335416704, "grad_norm": 0.07015435611322605, "learning_rate": 9.09396515762226e-05, "loss": 0.8459, "step": 154290 }, { "epoch": 2.708966098421672, "grad_norm": 0.04896178396777164, "learning_rate": 9.092856569434295e-05, "loss": 0.8447, "step": 154300 }, { "epoch": 2.709141663301673, "grad_norm": 0.054790024006316315, "learning_rate": 9.091747994007028e-05, "loss": 0.8509, "step": 154310 }, { "epoch": 2.7093172281816744, "grad_norm": 0.05624928379824036, "learning_rate": 9.090639431354509e-05, "loss": 0.8482, "step": 154320 }, { "epoch": 2.709492793061676, "grad_norm": 0.04987514822911432, "learning_rate": 9.089530881490781e-05, "loss": 0.8381, "step": 154330 }, { "epoch": 2.7096683579416774, "grad_norm": 0.05808821692306947, "learning_rate": 9.088422344429888e-05, "loss": 0.8375, "step": 154340 }, { "epoch": 2.709843922821679, "grad_norm": 0.06316372777602167, "learning_rate": 9.087313820185882e-05, "loss": 0.8444, "step": 154350 }, { "epoch": 2.71001948770168, "grad_norm": 0.04807315940573827, "learning_rate": 9.086205308772803e-05, "loss": 0.8508, "step": 154360 }, { "epoch": 2.710195052581682, "grad_norm": 0.05509055168015543, "learning_rate": 9.085096810204693e-05, "loss": 0.8375, "step": 154370 }, { "epoch": 2.710370617461683, "grad_norm": 0.05441390840489102, "learning_rate": 9.083988324495604e-05, "loss": 0.8341, "step": 154380 }, { "epoch": 2.7105461823416843, "grad_norm": 0.07348029507797692, "learning_rate": 9.082879851659579e-05, "loss": 0.8504, "step": 154390 }, { "epoch": 2.710721747221686, "grad_norm": 0.0532918310212392, "learning_rate": 9.081771391710662e-05, "loss": 0.8469, "step": 154400 }, { "epoch": 2.7108973121016873, "grad_norm": 0.04244320706785365, "learning_rate": 9.080662944662892e-05, "loss": 0.8443, "step": 154410 }, { "epoch": 2.7110728769816888, "grad_norm": 0.07469473152827645, "learning_rate": 9.079554510530321e-05, "loss": 0.8469, "step": 154420 }, { "epoch": 2.71124844186169, "grad_norm": 0.05150949478395944, "learning_rate": 9.07844608932699e-05, "loss": 0.8472, "step": 154430 }, { "epoch": 2.7114240067416913, "grad_norm": 0.07386180806879152, "learning_rate": 9.07733768106694e-05, "loss": 0.8467, "step": 154440 }, { "epoch": 2.7115995716216927, "grad_norm": 0.05055191450667085, "learning_rate": 9.07622928576422e-05, "loss": 0.8474, "step": 154450 }, { "epoch": 2.711775136501694, "grad_norm": 0.046952911724529145, "learning_rate": 9.07512090343287e-05, "loss": 0.8484, "step": 154460 }, { "epoch": 2.7119507013816957, "grad_norm": 0.04894912833141031, "learning_rate": 9.074012534086937e-05, "loss": 0.8351, "step": 154470 }, { "epoch": 2.712126266261697, "grad_norm": 0.057365381454763505, "learning_rate": 9.072904177740459e-05, "loss": 0.8443, "step": 154480 }, { "epoch": 2.7123018311416986, "grad_norm": 0.05471033307262754, "learning_rate": 9.071795834407484e-05, "loss": 0.848, "step": 154490 }, { "epoch": 2.7124773960216997, "grad_norm": 0.05752915237665505, "learning_rate": 9.070687504102053e-05, "loss": 0.8498, "step": 154500 }, { "epoch": 2.712652960901701, "grad_norm": 0.04352731607760854, "learning_rate": 9.069579186838204e-05, "loss": 0.8467, "step": 154510 }, { "epoch": 2.7128285257817026, "grad_norm": 0.04287803718277755, "learning_rate": 9.06847088262999e-05, "loss": 0.8428, "step": 154520 }, { "epoch": 2.713004090661704, "grad_norm": 0.05810708627929148, "learning_rate": 9.067362591491447e-05, "loss": 0.8379, "step": 154530 }, { "epoch": 2.7131796555417056, "grad_norm": 0.05588771275889898, "learning_rate": 9.066254313436613e-05, "loss": 0.843, "step": 154540 }, { "epoch": 2.7133552204217066, "grad_norm": 0.06212812533486526, "learning_rate": 9.065146048479539e-05, "loss": 0.8513, "step": 154550 }, { "epoch": 2.713530785301708, "grad_norm": 0.05174527078240542, "learning_rate": 9.064037796634263e-05, "loss": 0.8391, "step": 154560 }, { "epoch": 2.7137063501817096, "grad_norm": 0.06842215993733804, "learning_rate": 9.062929557914825e-05, "loss": 0.8495, "step": 154570 }, { "epoch": 2.713881915061711, "grad_norm": 0.04609208194836482, "learning_rate": 9.061821332335268e-05, "loss": 0.8478, "step": 154580 }, { "epoch": 2.7140574799417125, "grad_norm": 0.053084821688936586, "learning_rate": 9.060713119909637e-05, "loss": 0.8481, "step": 154590 }, { "epoch": 2.714233044821714, "grad_norm": 0.05086414747581364, "learning_rate": 9.059604920651968e-05, "loss": 0.8405, "step": 154600 }, { "epoch": 2.7144086097017155, "grad_norm": 0.04840270268910114, "learning_rate": 9.0584967345763e-05, "loss": 0.8483, "step": 154610 }, { "epoch": 2.7145841745817165, "grad_norm": 0.06728584570453013, "learning_rate": 9.057388561696681e-05, "loss": 0.8339, "step": 154620 }, { "epoch": 2.714759739461718, "grad_norm": 0.060944344093464005, "learning_rate": 9.056280402027151e-05, "loss": 0.8473, "step": 154630 }, { "epoch": 2.7149353043417195, "grad_norm": 0.07926794592147861, "learning_rate": 9.055172255581747e-05, "loss": 0.8424, "step": 154640 }, { "epoch": 2.715110869221721, "grad_norm": 0.05761421930222924, "learning_rate": 9.054064122374509e-05, "loss": 0.8446, "step": 154650 }, { "epoch": 2.7152864341017224, "grad_norm": 0.06798534591052041, "learning_rate": 9.052956002419482e-05, "loss": 0.843, "step": 154660 }, { "epoch": 2.7154619989817235, "grad_norm": 0.0650564766198692, "learning_rate": 9.051847895730703e-05, "loss": 0.831, "step": 154670 }, { "epoch": 2.715637563861725, "grad_norm": 0.07578954677868643, "learning_rate": 9.050739802322207e-05, "loss": 0.8381, "step": 154680 }, { "epoch": 2.7158131287417264, "grad_norm": 0.08638958128584583, "learning_rate": 9.049631722208045e-05, "loss": 0.8482, "step": 154690 }, { "epoch": 2.715988693621728, "grad_norm": 0.09309691517575748, "learning_rate": 9.048523655402249e-05, "loss": 0.8435, "step": 154700 }, { "epoch": 2.7161642585017294, "grad_norm": 0.05527126490152272, "learning_rate": 9.047415601918855e-05, "loss": 0.8418, "step": 154710 }, { "epoch": 2.716339823381731, "grad_norm": 0.051983791280825455, "learning_rate": 9.046307561771912e-05, "loss": 0.8377, "step": 154720 }, { "epoch": 2.7165153882617323, "grad_norm": 0.05951340077736277, "learning_rate": 9.045199534975453e-05, "loss": 0.8477, "step": 154730 }, { "epoch": 2.7166909531417334, "grad_norm": 0.057038106176347886, "learning_rate": 9.04409152154352e-05, "loss": 0.8399, "step": 154740 }, { "epoch": 2.716866518021735, "grad_norm": 0.05871381566064623, "learning_rate": 9.042983521490144e-05, "loss": 0.8399, "step": 154750 }, { "epoch": 2.7170420829017363, "grad_norm": 0.06928297804635102, "learning_rate": 9.041875534829375e-05, "loss": 0.8425, "step": 154760 }, { "epoch": 2.717217647781738, "grad_norm": 0.05589264409308278, "learning_rate": 9.040767561575243e-05, "loss": 0.84, "step": 154770 }, { "epoch": 2.7173932126617393, "grad_norm": 0.05220679495985041, "learning_rate": 9.039659601741788e-05, "loss": 0.8434, "step": 154780 }, { "epoch": 2.7175687775417403, "grad_norm": 0.04341720738806322, "learning_rate": 9.038551655343052e-05, "loss": 0.8452, "step": 154790 }, { "epoch": 2.7177443424217422, "grad_norm": 0.04685448440804894, "learning_rate": 9.037443722393068e-05, "loss": 0.8488, "step": 154800 }, { "epoch": 2.7179199073017433, "grad_norm": 0.05113302605684361, "learning_rate": 9.036335802905878e-05, "loss": 0.8475, "step": 154810 }, { "epoch": 2.7180954721817447, "grad_norm": 0.06468493185968716, "learning_rate": 9.035227896895515e-05, "loss": 0.8491, "step": 154820 }, { "epoch": 2.718271037061746, "grad_norm": 0.058565900532168674, "learning_rate": 9.034120004376018e-05, "loss": 0.8451, "step": 154830 }, { "epoch": 2.7184466019417477, "grad_norm": 0.051395010155118895, "learning_rate": 9.033012125361428e-05, "loss": 0.8348, "step": 154840 }, { "epoch": 2.718622166821749, "grad_norm": 0.05079786089220619, "learning_rate": 9.031904259865773e-05, "loss": 0.8381, "step": 154850 }, { "epoch": 2.71879773170175, "grad_norm": 0.06225748197008897, "learning_rate": 9.030796407903103e-05, "loss": 0.844, "step": 154860 }, { "epoch": 2.7189732965817517, "grad_norm": 0.07903351960489867, "learning_rate": 9.029688569487446e-05, "loss": 0.8371, "step": 154870 }, { "epoch": 2.719148861461753, "grad_norm": 0.06985732252741639, "learning_rate": 9.028580744632834e-05, "loss": 0.8406, "step": 154880 }, { "epoch": 2.7193244263417546, "grad_norm": 0.05579558246732594, "learning_rate": 9.027472933353314e-05, "loss": 0.8514, "step": 154890 }, { "epoch": 2.719499991221756, "grad_norm": 0.05597785421429863, "learning_rate": 9.026365135662918e-05, "loss": 0.8373, "step": 154900 }, { "epoch": 2.719675556101757, "grad_norm": 0.04847088340863627, "learning_rate": 9.02525735157568e-05, "loss": 0.8479, "step": 154910 }, { "epoch": 2.719851120981759, "grad_norm": 0.05033729156039539, "learning_rate": 9.024149581105637e-05, "loss": 0.8466, "step": 154920 }, { "epoch": 2.72002668586176, "grad_norm": 0.05889381389640331, "learning_rate": 9.023041824266826e-05, "loss": 0.8491, "step": 154930 }, { "epoch": 2.7202022507417616, "grad_norm": 0.048972518779509644, "learning_rate": 9.021934081073284e-05, "loss": 0.8462, "step": 154940 }, { "epoch": 2.720377815621763, "grad_norm": 0.06892656764609656, "learning_rate": 9.020826351539036e-05, "loss": 0.8357, "step": 154950 }, { "epoch": 2.7205533805017645, "grad_norm": 0.05836119327936013, "learning_rate": 9.01971863567813e-05, "loss": 0.8442, "step": 154960 }, { "epoch": 2.720728945381766, "grad_norm": 0.050887022714182044, "learning_rate": 9.018610933504597e-05, "loss": 0.8478, "step": 154970 }, { "epoch": 2.720904510261767, "grad_norm": 0.0664802956001212, "learning_rate": 9.017503245032472e-05, "loss": 0.8401, "step": 154980 }, { "epoch": 2.7210800751417685, "grad_norm": 0.043442031432604514, "learning_rate": 9.016395570275783e-05, "loss": 0.8412, "step": 154990 }, { "epoch": 2.72125564002177, "grad_norm": 0.06546213748836832, "learning_rate": 9.015287909248574e-05, "loss": 0.8484, "step": 155000 }, { "epoch": 2.7214312049017715, "grad_norm": 0.06110184837742583, "learning_rate": 9.014180261964874e-05, "loss": 0.8394, "step": 155010 }, { "epoch": 2.721606769781773, "grad_norm": 0.050507732489943155, "learning_rate": 9.013072628438714e-05, "loss": 0.8448, "step": 155020 }, { "epoch": 2.721782334661774, "grad_norm": 0.0581979095215319, "learning_rate": 9.01196500868414e-05, "loss": 0.8426, "step": 155030 }, { "epoch": 2.721957899541776, "grad_norm": 0.05624933325167508, "learning_rate": 9.010857402715175e-05, "loss": 0.8402, "step": 155040 }, { "epoch": 2.722133464421777, "grad_norm": 0.049470777589182516, "learning_rate": 9.00974981054585e-05, "loss": 0.8402, "step": 155050 }, { "epoch": 2.7223090293017784, "grad_norm": 0.052497273487474294, "learning_rate": 9.00864223219021e-05, "loss": 0.8449, "step": 155060 }, { "epoch": 2.72248459418178, "grad_norm": 0.07556388436896447, "learning_rate": 9.00753466766228e-05, "loss": 0.8481, "step": 155070 }, { "epoch": 2.7226601590617814, "grad_norm": 0.05991658900482496, "learning_rate": 9.006427116976096e-05, "loss": 0.8405, "step": 155080 }, { "epoch": 2.722835723941783, "grad_norm": 0.058074178570444945, "learning_rate": 9.005319580145689e-05, "loss": 0.8426, "step": 155090 }, { "epoch": 2.723011288821784, "grad_norm": 0.07190598762725503, "learning_rate": 9.004212057185094e-05, "loss": 0.843, "step": 155100 }, { "epoch": 2.7231868537017854, "grad_norm": 0.05507424892635152, "learning_rate": 9.003104548108342e-05, "loss": 0.8407, "step": 155110 }, { "epoch": 2.723362418581787, "grad_norm": 0.05173279103608182, "learning_rate": 9.001997052929462e-05, "loss": 0.8372, "step": 155120 }, { "epoch": 2.7235379834617883, "grad_norm": 0.061055240524716045, "learning_rate": 9.000889571662492e-05, "loss": 0.8406, "step": 155130 }, { "epoch": 2.72371354834179, "grad_norm": 0.043254969432970296, "learning_rate": 8.999782104321464e-05, "loss": 0.8473, "step": 155140 }, { "epoch": 2.723889113221791, "grad_norm": 0.060919102171730195, "learning_rate": 8.998674650920406e-05, "loss": 0.842, "step": 155150 }, { "epoch": 2.7240646781017928, "grad_norm": 0.08242381794614954, "learning_rate": 8.997567211473352e-05, "loss": 0.8401, "step": 155160 }, { "epoch": 2.724240242981794, "grad_norm": 0.0653946971094182, "learning_rate": 8.996459785994331e-05, "loss": 0.8475, "step": 155170 }, { "epoch": 2.7244158078617953, "grad_norm": 0.055461899277198655, "learning_rate": 8.995352374497379e-05, "loss": 0.8474, "step": 155180 }, { "epoch": 2.7245913727417967, "grad_norm": 0.05745037770802562, "learning_rate": 8.994244976996518e-05, "loss": 0.8461, "step": 155190 }, { "epoch": 2.7247669376217982, "grad_norm": 0.05625404812217857, "learning_rate": 8.993137593505792e-05, "loss": 0.8436, "step": 155200 }, { "epoch": 2.7249425025017997, "grad_norm": 0.054623190461124266, "learning_rate": 8.992030224039223e-05, "loss": 0.8433, "step": 155210 }, { "epoch": 2.7251180673818007, "grad_norm": 0.06151421648745013, "learning_rate": 8.990922868610839e-05, "loss": 0.8524, "step": 155220 }, { "epoch": 2.725293632261802, "grad_norm": 0.04503915917187927, "learning_rate": 8.989815527234679e-05, "loss": 0.84, "step": 155230 }, { "epoch": 2.7254691971418037, "grad_norm": 0.05607517504323106, "learning_rate": 8.988708199924769e-05, "loss": 0.8419, "step": 155240 }, { "epoch": 2.725644762021805, "grad_norm": 0.06294600607308953, "learning_rate": 8.98760088669514e-05, "loss": 0.8427, "step": 155250 }, { "epoch": 2.7258203269018066, "grad_norm": 0.05452832958601964, "learning_rate": 8.986493587559817e-05, "loss": 0.8497, "step": 155260 }, { "epoch": 2.725995891781808, "grad_norm": 0.04397536402781362, "learning_rate": 8.985386302532837e-05, "loss": 0.8503, "step": 155270 }, { "epoch": 2.7261714566618096, "grad_norm": 0.05458062418410865, "learning_rate": 8.984279031628226e-05, "loss": 0.8415, "step": 155280 }, { "epoch": 2.7263470215418106, "grad_norm": 0.048099606204296076, "learning_rate": 8.98317177486001e-05, "loss": 0.8466, "step": 155290 }, { "epoch": 2.726522586421812, "grad_norm": 0.07007090732710797, "learning_rate": 8.982064532242225e-05, "loss": 0.835, "step": 155300 }, { "epoch": 2.7266981513018136, "grad_norm": 0.05171950020902155, "learning_rate": 8.980957303788896e-05, "loss": 0.8416, "step": 155310 }, { "epoch": 2.726873716181815, "grad_norm": 0.06161040294120233, "learning_rate": 8.979850089514053e-05, "loss": 0.8622, "step": 155320 }, { "epoch": 2.7270492810618165, "grad_norm": 0.05070599793677813, "learning_rate": 8.978742889431722e-05, "loss": 0.8421, "step": 155330 }, { "epoch": 2.7272248459418176, "grad_norm": 0.048389361413623565, "learning_rate": 8.977635703555936e-05, "loss": 0.8414, "step": 155340 }, { "epoch": 2.727400410821819, "grad_norm": 0.05464806202077278, "learning_rate": 8.976528531900721e-05, "loss": 0.8466, "step": 155350 }, { "epoch": 2.7275759757018205, "grad_norm": 0.05289754476206841, "learning_rate": 8.9754213744801e-05, "loss": 0.8439, "step": 155360 }, { "epoch": 2.727751540581822, "grad_norm": 0.05558533668228626, "learning_rate": 8.974314231308112e-05, "loss": 0.8498, "step": 155370 }, { "epoch": 2.7279271054618235, "grad_norm": 0.04818793051177853, "learning_rate": 8.973207102398776e-05, "loss": 0.837, "step": 155380 }, { "epoch": 2.728102670341825, "grad_norm": 0.0649043875182662, "learning_rate": 8.972099987766118e-05, "loss": 0.8411, "step": 155390 }, { "epoch": 2.7282782352218264, "grad_norm": 0.06483816744848885, "learning_rate": 8.970992887424174e-05, "loss": 0.8465, "step": 155400 }, { "epoch": 2.7284538001018275, "grad_norm": 0.055823270132392, "learning_rate": 8.969885801386966e-05, "loss": 0.8444, "step": 155410 }, { "epoch": 2.728629364981829, "grad_norm": 0.054368723734964285, "learning_rate": 8.968778729668523e-05, "loss": 0.839, "step": 155420 }, { "epoch": 2.7288049298618304, "grad_norm": 0.05663005169252125, "learning_rate": 8.967671672282864e-05, "loss": 0.8449, "step": 155430 }, { "epoch": 2.728980494741832, "grad_norm": 0.06895950734263263, "learning_rate": 8.966564629244029e-05, "loss": 0.845, "step": 155440 }, { "epoch": 2.7291560596218334, "grad_norm": 0.055672556718884526, "learning_rate": 8.965457600566034e-05, "loss": 0.8445, "step": 155450 }, { "epoch": 2.7293316245018344, "grad_norm": 0.08338140227290301, "learning_rate": 8.964350586262906e-05, "loss": 0.8459, "step": 155460 }, { "epoch": 2.729507189381836, "grad_norm": 0.059259240157909156, "learning_rate": 8.963243586348678e-05, "loss": 0.8458, "step": 155470 }, { "epoch": 2.7296827542618374, "grad_norm": 0.042847228616197254, "learning_rate": 8.96213660083737e-05, "loss": 0.8457, "step": 155480 }, { "epoch": 2.729858319141839, "grad_norm": 0.05738965180078845, "learning_rate": 8.96102962974301e-05, "loss": 0.836, "step": 155490 }, { "epoch": 2.7300338840218403, "grad_norm": 0.05370951231062569, "learning_rate": 8.959922673079623e-05, "loss": 0.837, "step": 155500 }, { "epoch": 2.730209448901842, "grad_norm": 0.06208879139771057, "learning_rate": 8.958815730861234e-05, "loss": 0.8437, "step": 155510 }, { "epoch": 2.7303850137818433, "grad_norm": 0.0584387217890538, "learning_rate": 8.957708803101869e-05, "loss": 0.8373, "step": 155520 }, { "epoch": 2.7305605786618443, "grad_norm": 0.04399152521178906, "learning_rate": 8.956601889815548e-05, "loss": 0.8342, "step": 155530 }, { "epoch": 2.730736143541846, "grad_norm": 0.04686451625124202, "learning_rate": 8.955494991016306e-05, "loss": 0.8476, "step": 155540 }, { "epoch": 2.7309117084218473, "grad_norm": 0.046393777717331136, "learning_rate": 8.954388106718161e-05, "loss": 0.8456, "step": 155550 }, { "epoch": 2.7310872733018488, "grad_norm": 0.04912554402379017, "learning_rate": 8.953281236935134e-05, "loss": 0.84, "step": 155560 }, { "epoch": 2.7312628381818502, "grad_norm": 0.053350228866521625, "learning_rate": 8.952174381681256e-05, "loss": 0.8485, "step": 155570 }, { "epoch": 2.7314384030618513, "grad_norm": 0.06533487343853546, "learning_rate": 8.951067540970549e-05, "loss": 0.8421, "step": 155580 }, { "epoch": 2.731613967941853, "grad_norm": 0.0700306932837539, "learning_rate": 8.949960714817038e-05, "loss": 0.8499, "step": 155590 }, { "epoch": 2.731789532821854, "grad_norm": 0.05104891259343752, "learning_rate": 8.948853903234742e-05, "loss": 0.8305, "step": 155600 }, { "epoch": 2.7319650977018557, "grad_norm": 0.047832144949749494, "learning_rate": 8.94774710623769e-05, "loss": 0.8447, "step": 155610 }, { "epoch": 2.732140662581857, "grad_norm": 0.055310994109528805, "learning_rate": 8.946640323839904e-05, "loss": 0.8409, "step": 155620 }, { "epoch": 2.7323162274618586, "grad_norm": 0.07734682866456732, "learning_rate": 8.9455335560554e-05, "loss": 0.8338, "step": 155630 }, { "epoch": 2.73249179234186, "grad_norm": 0.053537042591836254, "learning_rate": 8.944426802898211e-05, "loss": 0.8433, "step": 155640 }, { "epoch": 2.732667357221861, "grad_norm": 0.058533522415928796, "learning_rate": 8.943320064382359e-05, "loss": 0.8455, "step": 155650 }, { "epoch": 2.7328429221018626, "grad_norm": 0.05015474357318129, "learning_rate": 8.94221334052186e-05, "loss": 0.8456, "step": 155660 }, { "epoch": 2.733018486981864, "grad_norm": 0.05569900684171495, "learning_rate": 8.941106631330739e-05, "loss": 0.8467, "step": 155670 }, { "epoch": 2.7331940518618656, "grad_norm": 0.05182997150490459, "learning_rate": 8.939999936823022e-05, "loss": 0.847, "step": 155680 }, { "epoch": 2.733369616741867, "grad_norm": 0.07281966369835097, "learning_rate": 8.938893257012729e-05, "loss": 0.8502, "step": 155690 }, { "epoch": 2.733545181621868, "grad_norm": 0.06346215987883515, "learning_rate": 8.937786591913874e-05, "loss": 0.8436, "step": 155700 }, { "epoch": 2.73372074650187, "grad_norm": 0.049231608894537066, "learning_rate": 8.936679941540494e-05, "loss": 0.8435, "step": 155710 }, { "epoch": 2.733896311381871, "grad_norm": 0.04847594238885205, "learning_rate": 8.935573305906598e-05, "loss": 0.842, "step": 155720 }, { "epoch": 2.7340718762618725, "grad_norm": 0.0809754880959855, "learning_rate": 8.93446668502621e-05, "loss": 0.8486, "step": 155730 }, { "epoch": 2.734247441141874, "grad_norm": 0.05156526571454091, "learning_rate": 8.933360078913354e-05, "loss": 0.8497, "step": 155740 }, { "epoch": 2.7344230060218755, "grad_norm": 0.05607785227017324, "learning_rate": 8.932253487582051e-05, "loss": 0.8447, "step": 155750 }, { "epoch": 2.734598570901877, "grad_norm": 0.05735395948213819, "learning_rate": 8.93114691104632e-05, "loss": 0.8431, "step": 155760 }, { "epoch": 2.734774135781878, "grad_norm": 0.05506475823682268, "learning_rate": 8.930040349320178e-05, "loss": 0.8423, "step": 155770 }, { "epoch": 2.7349497006618795, "grad_norm": 0.0815044412086397, "learning_rate": 8.928933802417653e-05, "loss": 0.8408, "step": 155780 }, { "epoch": 2.735125265541881, "grad_norm": 0.055793140469159834, "learning_rate": 8.92782727035276e-05, "loss": 0.8442, "step": 155790 }, { "epoch": 2.7353008304218824, "grad_norm": 0.052129389497533526, "learning_rate": 8.926720753139516e-05, "loss": 0.8412, "step": 155800 }, { "epoch": 2.735476395301884, "grad_norm": 0.06381819806493876, "learning_rate": 8.925614250791949e-05, "loss": 0.8441, "step": 155810 }, { "epoch": 2.735651960181885, "grad_norm": 0.05829510955535627, "learning_rate": 8.924507763324075e-05, "loss": 0.8318, "step": 155820 }, { "epoch": 2.735827525061887, "grad_norm": 0.06877609919058732, "learning_rate": 8.923401290749913e-05, "loss": 0.8423, "step": 155830 }, { "epoch": 2.736003089941888, "grad_norm": 0.04495651209178678, "learning_rate": 8.922294833083479e-05, "loss": 0.8472, "step": 155840 }, { "epoch": 2.7361786548218894, "grad_norm": 0.05446901243709873, "learning_rate": 8.921188390338798e-05, "loss": 0.8461, "step": 155850 }, { "epoch": 2.736354219701891, "grad_norm": 0.06843397846559066, "learning_rate": 8.920081962529888e-05, "loss": 0.8423, "step": 155860 }, { "epoch": 2.7365297845818923, "grad_norm": 0.054148378863888166, "learning_rate": 8.918975549670758e-05, "loss": 0.8422, "step": 155870 }, { "epoch": 2.736705349461894, "grad_norm": 0.05106995086345871, "learning_rate": 8.917869151775443e-05, "loss": 0.846, "step": 155880 }, { "epoch": 2.736880914341895, "grad_norm": 0.05542072775592845, "learning_rate": 8.91676276885795e-05, "loss": 0.8459, "step": 155890 }, { "epoch": 2.7370564792218963, "grad_norm": 0.05720098583548352, "learning_rate": 8.915656400932294e-05, "loss": 0.8378, "step": 155900 }, { "epoch": 2.737232044101898, "grad_norm": 0.05682918280022831, "learning_rate": 8.914550048012504e-05, "loss": 0.8408, "step": 155910 }, { "epoch": 2.7374076089818993, "grad_norm": 0.0611119123030408, "learning_rate": 8.91344371011259e-05, "loss": 0.8345, "step": 155920 }, { "epoch": 2.7375831738619008, "grad_norm": 0.07237699474408447, "learning_rate": 8.912337387246574e-05, "loss": 0.8423, "step": 155930 }, { "epoch": 2.7377587387419022, "grad_norm": 0.06357118415718814, "learning_rate": 8.911231079428467e-05, "loss": 0.8408, "step": 155940 }, { "epoch": 2.7379343036219037, "grad_norm": 0.06597580049400058, "learning_rate": 8.910124786672293e-05, "loss": 0.8363, "step": 155950 }, { "epoch": 2.7381098685019047, "grad_norm": 0.049843542283602875, "learning_rate": 8.909018508992065e-05, "loss": 0.8378, "step": 155960 }, { "epoch": 2.738285433381906, "grad_norm": 0.05604466003006176, "learning_rate": 8.907912246401798e-05, "loss": 0.8486, "step": 155970 }, { "epoch": 2.7384609982619077, "grad_norm": 0.05751373619544122, "learning_rate": 8.906805998915514e-05, "loss": 0.8455, "step": 155980 }, { "epoch": 2.738636563141909, "grad_norm": 0.054044567011016516, "learning_rate": 8.905699766547229e-05, "loss": 0.8473, "step": 155990 }, { "epoch": 2.7388121280219107, "grad_norm": 0.05910728866014906, "learning_rate": 8.904593549310954e-05, "loss": 0.8403, "step": 156000 }, { "epoch": 2.7389876929019117, "grad_norm": 0.05947345267166262, "learning_rate": 8.903487347220708e-05, "loss": 0.8399, "step": 156010 }, { "epoch": 2.739163257781913, "grad_norm": 0.06426089564717849, "learning_rate": 8.902381160290507e-05, "loss": 0.8327, "step": 156020 }, { "epoch": 2.7393388226619146, "grad_norm": 0.07578030329053079, "learning_rate": 8.901274988534369e-05, "loss": 0.842, "step": 156030 }, { "epoch": 2.739514387541916, "grad_norm": 0.06627332719167481, "learning_rate": 8.9001688319663e-05, "loss": 0.841, "step": 156040 }, { "epoch": 2.7396899524219176, "grad_norm": 0.05394894911142923, "learning_rate": 8.899062690600327e-05, "loss": 0.8404, "step": 156050 }, { "epoch": 2.739865517301919, "grad_norm": 0.05625012983921044, "learning_rate": 8.89795656445046e-05, "loss": 0.8429, "step": 156060 }, { "epoch": 2.7400410821819206, "grad_norm": 0.05269143304418486, "learning_rate": 8.896850453530708e-05, "loss": 0.847, "step": 156070 }, { "epoch": 2.7402166470619216, "grad_norm": 0.06191090381131492, "learning_rate": 8.895744357855097e-05, "loss": 0.8461, "step": 156080 }, { "epoch": 2.740392211941923, "grad_norm": 0.07081879408244118, "learning_rate": 8.894638277437636e-05, "loss": 0.8443, "step": 156090 }, { "epoch": 2.7405677768219245, "grad_norm": 0.0541432659240325, "learning_rate": 8.893532212292338e-05, "loss": 0.8481, "step": 156100 }, { "epoch": 2.740743341701926, "grad_norm": 0.0668375699844316, "learning_rate": 8.892426162433216e-05, "loss": 0.8465, "step": 156110 }, { "epoch": 2.7409189065819275, "grad_norm": 0.047143997207747486, "learning_rate": 8.891320127874288e-05, "loss": 0.8526, "step": 156120 }, { "epoch": 2.7410944714619285, "grad_norm": 0.05284006097686649, "learning_rate": 8.890214108629567e-05, "loss": 0.8354, "step": 156130 }, { "epoch": 2.74127003634193, "grad_norm": 0.04994521888271643, "learning_rate": 8.88910810471306e-05, "loss": 0.8391, "step": 156140 }, { "epoch": 2.7414456012219315, "grad_norm": 0.03830859375189048, "learning_rate": 8.88800211613879e-05, "loss": 0.8447, "step": 156150 }, { "epoch": 2.741621166101933, "grad_norm": 0.05176914136916239, "learning_rate": 8.886896142920764e-05, "loss": 0.8476, "step": 156160 }, { "epoch": 2.7417967309819344, "grad_norm": 0.06959860141322372, "learning_rate": 8.885790185072998e-05, "loss": 0.837, "step": 156170 }, { "epoch": 2.741972295861936, "grad_norm": 0.050606326381222005, "learning_rate": 8.884684242609498e-05, "loss": 0.8484, "step": 156180 }, { "epoch": 2.7421478607419374, "grad_norm": 0.04627675237209581, "learning_rate": 8.883578315544288e-05, "loss": 0.843, "step": 156190 }, { "epoch": 2.7423234256219384, "grad_norm": 0.04876315799200678, "learning_rate": 8.882472403891369e-05, "loss": 0.842, "step": 156200 }, { "epoch": 2.74249899050194, "grad_norm": 0.05987493070261072, "learning_rate": 8.88136650766476e-05, "loss": 0.8409, "step": 156210 }, { "epoch": 2.7426745553819414, "grad_norm": 0.05420106014359952, "learning_rate": 8.880260626878472e-05, "loss": 0.8502, "step": 156220 }, { "epoch": 2.742850120261943, "grad_norm": 0.06001126912379015, "learning_rate": 8.879154761546515e-05, "loss": 0.8508, "step": 156230 }, { "epoch": 2.7430256851419443, "grad_norm": 0.06678362699181668, "learning_rate": 8.878048911682896e-05, "loss": 0.8398, "step": 156240 }, { "epoch": 2.7432012500219454, "grad_norm": 0.07126025953455313, "learning_rate": 8.876943077301638e-05, "loss": 0.8459, "step": 156250 }, { "epoch": 2.7433768149019473, "grad_norm": 0.1056210565719668, "learning_rate": 8.875837258416745e-05, "loss": 0.8433, "step": 156260 }, { "epoch": 2.7435523797819483, "grad_norm": 0.04534477382793864, "learning_rate": 8.874731455042227e-05, "loss": 0.845, "step": 156270 }, { "epoch": 2.74372794466195, "grad_norm": 0.04418312841552144, "learning_rate": 8.873625667192095e-05, "loss": 0.8475, "step": 156280 }, { "epoch": 2.7439035095419513, "grad_norm": 0.05737372577090714, "learning_rate": 8.872519894880363e-05, "loss": 0.8423, "step": 156290 }, { "epoch": 2.7440790744219528, "grad_norm": 0.055396026150413516, "learning_rate": 8.87141413812104e-05, "loss": 0.8354, "step": 156300 }, { "epoch": 2.7442546393019542, "grad_norm": 0.06676750622489885, "learning_rate": 8.870308396928131e-05, "loss": 0.8437, "step": 156310 }, { "epoch": 2.7444302041819553, "grad_norm": 0.05805953865870403, "learning_rate": 8.869202671315655e-05, "loss": 0.8519, "step": 156320 }, { "epoch": 2.7446057690619567, "grad_norm": 0.0514987843601286, "learning_rate": 8.868096961297616e-05, "loss": 0.8422, "step": 156330 }, { "epoch": 2.7447813339419582, "grad_norm": 0.06266746436985629, "learning_rate": 8.866991266888022e-05, "loss": 0.8368, "step": 156340 }, { "epoch": 2.7449568988219597, "grad_norm": 0.047973759082011166, "learning_rate": 8.865885588100886e-05, "loss": 0.8418, "step": 156350 }, { "epoch": 2.745132463701961, "grad_norm": 0.04782082359751911, "learning_rate": 8.864779924950219e-05, "loss": 0.84, "step": 156360 }, { "epoch": 2.745308028581962, "grad_norm": 0.05674484740447128, "learning_rate": 8.863674277450024e-05, "loss": 0.8456, "step": 156370 }, { "epoch": 2.745483593461964, "grad_norm": 0.0569024146546244, "learning_rate": 8.862568645614311e-05, "loss": 0.8436, "step": 156380 }, { "epoch": 2.745659158341965, "grad_norm": 0.05027813097255785, "learning_rate": 8.861463029457096e-05, "loss": 0.8348, "step": 156390 }, { "epoch": 2.7458347232219666, "grad_norm": 0.060073598166479066, "learning_rate": 8.86035742899238e-05, "loss": 0.8456, "step": 156400 }, { "epoch": 2.746010288101968, "grad_norm": 0.0596709515455734, "learning_rate": 8.859251844234168e-05, "loss": 0.8471, "step": 156410 }, { "epoch": 2.7461858529819696, "grad_norm": 0.04947402463879879, "learning_rate": 8.858146275196478e-05, "loss": 0.8446, "step": 156420 }, { "epoch": 2.746361417861971, "grad_norm": 0.06210672041204684, "learning_rate": 8.857040721893312e-05, "loss": 0.838, "step": 156430 }, { "epoch": 2.746536982741972, "grad_norm": 0.052808262221882826, "learning_rate": 8.855935184338679e-05, "loss": 0.8377, "step": 156440 }, { "epoch": 2.7467125476219736, "grad_norm": 0.05880272516152189, "learning_rate": 8.854829662546581e-05, "loss": 0.8465, "step": 156450 }, { "epoch": 2.746888112501975, "grad_norm": 0.052288304081444525, "learning_rate": 8.853724156531035e-05, "loss": 0.8484, "step": 156460 }, { "epoch": 2.7470636773819765, "grad_norm": 0.058251935977457314, "learning_rate": 8.852618666306041e-05, "loss": 0.8412, "step": 156470 }, { "epoch": 2.747239242261978, "grad_norm": 0.053060096089677, "learning_rate": 8.851513191885605e-05, "loss": 0.8457, "step": 156480 }, { "epoch": 2.747414807141979, "grad_norm": 0.06494868513058978, "learning_rate": 8.85040773328374e-05, "loss": 0.8395, "step": 156490 }, { "epoch": 2.747590372021981, "grad_norm": 0.04466637518146537, "learning_rate": 8.849302290514448e-05, "loss": 0.8411, "step": 156500 }, { "epoch": 2.747765936901982, "grad_norm": 0.05370407867000349, "learning_rate": 8.848196863591736e-05, "loss": 0.8382, "step": 156510 }, { "epoch": 2.7479415017819835, "grad_norm": 0.06004801133990986, "learning_rate": 8.847091452529607e-05, "loss": 0.8451, "step": 156520 }, { "epoch": 2.748117066661985, "grad_norm": 0.04445700802535055, "learning_rate": 8.845986057342071e-05, "loss": 0.8409, "step": 156530 }, { "epoch": 2.7482926315419864, "grad_norm": 0.04991079496011218, "learning_rate": 8.844880678043132e-05, "loss": 0.8421, "step": 156540 }, { "epoch": 2.748468196421988, "grad_norm": 0.07338296781473577, "learning_rate": 8.843775314646793e-05, "loss": 0.8543, "step": 156550 }, { "epoch": 2.748643761301989, "grad_norm": 0.04806306409552457, "learning_rate": 8.842669967167067e-05, "loss": 0.8414, "step": 156560 }, { "epoch": 2.7488193261819904, "grad_norm": 0.051368725857169145, "learning_rate": 8.84156463561795e-05, "loss": 0.849, "step": 156570 }, { "epoch": 2.748994891061992, "grad_norm": 0.0470505692629954, "learning_rate": 8.840459320013448e-05, "loss": 0.8395, "step": 156580 }, { "epoch": 2.7491704559419934, "grad_norm": 0.050394502009126846, "learning_rate": 8.83935402036757e-05, "loss": 0.8435, "step": 156590 }, { "epoch": 2.749346020821995, "grad_norm": 0.05864527602745477, "learning_rate": 8.83824873669432e-05, "loss": 0.8402, "step": 156600 }, { "epoch": 2.749521585701996, "grad_norm": 0.04859245680460384, "learning_rate": 8.837143469007701e-05, "loss": 0.8467, "step": 156610 }, { "epoch": 2.749697150581998, "grad_norm": 0.045091030493305234, "learning_rate": 8.83603821732171e-05, "loss": 0.8425, "step": 156620 }, { "epoch": 2.749872715461999, "grad_norm": 0.047902014995531324, "learning_rate": 8.834932981650362e-05, "loss": 0.8453, "step": 156630 }, { "epoch": 2.7500482803420003, "grad_norm": 0.05369132229194304, "learning_rate": 8.833827762007657e-05, "loss": 0.8397, "step": 156640 }, { "epoch": 2.750223845222002, "grad_norm": 0.05334721045150262, "learning_rate": 8.83272255840759e-05, "loss": 0.8424, "step": 156650 }, { "epoch": 2.7503994101020033, "grad_norm": 0.04688574357041273, "learning_rate": 8.831617370864176e-05, "loss": 0.8401, "step": 156660 }, { "epoch": 2.7505749749820048, "grad_norm": 0.08905608579105294, "learning_rate": 8.830512199391415e-05, "loss": 0.8543, "step": 156670 }, { "epoch": 2.750750539862006, "grad_norm": 0.06278228671775822, "learning_rate": 8.829407044003304e-05, "loss": 0.8422, "step": 156680 }, { "epoch": 2.7509261047420073, "grad_norm": 0.047434056972916534, "learning_rate": 8.828301904713851e-05, "loss": 0.8375, "step": 156690 }, { "epoch": 2.7511016696220087, "grad_norm": 0.07986100122579445, "learning_rate": 8.827196781537057e-05, "loss": 0.8375, "step": 156700 }, { "epoch": 2.7512772345020102, "grad_norm": 0.06616294229888202, "learning_rate": 8.826091674486924e-05, "loss": 0.8452, "step": 156710 }, { "epoch": 2.7514527993820117, "grad_norm": 0.057503881866973694, "learning_rate": 8.824986583577448e-05, "loss": 0.8355, "step": 156720 }, { "epoch": 2.751628364262013, "grad_norm": 0.051803196903713045, "learning_rate": 8.823881508822645e-05, "loss": 0.8392, "step": 156730 }, { "epoch": 2.7518039291420147, "grad_norm": 0.05285531775128753, "learning_rate": 8.822776450236504e-05, "loss": 0.8471, "step": 156740 }, { "epoch": 2.7519794940220157, "grad_norm": 0.07578620787157585, "learning_rate": 8.821671407833026e-05, "loss": 0.845, "step": 156750 }, { "epoch": 2.752155058902017, "grad_norm": 0.05122256202584284, "learning_rate": 8.820566381626221e-05, "loss": 0.8402, "step": 156760 }, { "epoch": 2.7523306237820186, "grad_norm": 0.0633493029032213, "learning_rate": 8.819461371630086e-05, "loss": 0.8456, "step": 156770 }, { "epoch": 2.75250618866202, "grad_norm": 0.04622485553071532, "learning_rate": 8.81835637785862e-05, "loss": 0.8481, "step": 156780 }, { "epoch": 2.7526817535420216, "grad_norm": 0.04610272840477818, "learning_rate": 8.817251400325822e-05, "loss": 0.84, "step": 156790 }, { "epoch": 2.7528573184220226, "grad_norm": 0.04906004463388337, "learning_rate": 8.816146439045695e-05, "loss": 0.8369, "step": 156800 }, { "epoch": 2.753032883302024, "grad_norm": 0.06653329032289927, "learning_rate": 8.815041494032242e-05, "loss": 0.8436, "step": 156810 }, { "epoch": 2.7532084481820256, "grad_norm": 0.050302624468321545, "learning_rate": 8.813936565299453e-05, "loss": 0.8474, "step": 156820 }, { "epoch": 2.753384013062027, "grad_norm": 0.051971886103538985, "learning_rate": 8.81283165286134e-05, "loss": 0.8442, "step": 156830 }, { "epoch": 2.7535595779420285, "grad_norm": 0.07088303134988598, "learning_rate": 8.811726756731897e-05, "loss": 0.8439, "step": 156840 }, { "epoch": 2.75373514282203, "grad_norm": 0.06503991474092136, "learning_rate": 8.810621876925121e-05, "loss": 0.8467, "step": 156850 }, { "epoch": 2.7539107077020315, "grad_norm": 0.05244304440489128, "learning_rate": 8.809517013455011e-05, "loss": 0.8502, "step": 156860 }, { "epoch": 2.7540862725820325, "grad_norm": 0.05436677783201485, "learning_rate": 8.80841216633557e-05, "loss": 0.838, "step": 156870 }, { "epoch": 2.754261837462034, "grad_norm": 0.07584943053362546, "learning_rate": 8.807307335580795e-05, "loss": 0.8452, "step": 156880 }, { "epoch": 2.7544374023420355, "grad_norm": 0.05538641589503444, "learning_rate": 8.806202521204679e-05, "loss": 0.8413, "step": 156890 }, { "epoch": 2.754612967222037, "grad_norm": 0.06514015941031055, "learning_rate": 8.805097723221232e-05, "loss": 0.8413, "step": 156900 }, { "epoch": 2.7547885321020384, "grad_norm": 0.06654063200553595, "learning_rate": 8.803992941644443e-05, "loss": 0.8432, "step": 156910 }, { "epoch": 2.7549640969820395, "grad_norm": 0.068562819275101, "learning_rate": 8.802888176488307e-05, "loss": 0.8491, "step": 156920 }, { "epoch": 2.755139661862041, "grad_norm": 0.08178585288875613, "learning_rate": 8.80178342776683e-05, "loss": 0.8402, "step": 156930 }, { "epoch": 2.7553152267420424, "grad_norm": 0.0525003098354295, "learning_rate": 8.800678695494008e-05, "loss": 0.8472, "step": 156940 }, { "epoch": 2.755490791622044, "grad_norm": 0.04093625254925667, "learning_rate": 8.799573979683833e-05, "loss": 0.8402, "step": 156950 }, { "epoch": 2.7556663565020454, "grad_norm": 0.06955406499657853, "learning_rate": 8.798469280350306e-05, "loss": 0.8436, "step": 156960 }, { "epoch": 2.755841921382047, "grad_norm": 0.06647004199709922, "learning_rate": 8.797364597507422e-05, "loss": 0.8446, "step": 156970 }, { "epoch": 2.7560174862620483, "grad_norm": 0.11430824667153548, "learning_rate": 8.79625993116918e-05, "loss": 0.8476, "step": 156980 }, { "epoch": 2.7561930511420494, "grad_norm": 0.07522935996864957, "learning_rate": 8.79515528134957e-05, "loss": 0.8408, "step": 156990 }, { "epoch": 2.756368616022051, "grad_norm": 0.073714792330851, "learning_rate": 8.794050648062595e-05, "loss": 0.8404, "step": 157000 }, { "epoch": 2.7565441809020523, "grad_norm": 0.05562373078891054, "learning_rate": 8.792946031322251e-05, "loss": 0.847, "step": 157010 }, { "epoch": 2.756719745782054, "grad_norm": 0.05551150272161376, "learning_rate": 8.79184143114253e-05, "loss": 0.8412, "step": 157020 }, { "epoch": 2.7568953106620553, "grad_norm": 0.05464405378540042, "learning_rate": 8.790736847537427e-05, "loss": 0.8406, "step": 157030 }, { "epoch": 2.7570708755420563, "grad_norm": 0.04482463921532193, "learning_rate": 8.789632280520942e-05, "loss": 0.8362, "step": 157040 }, { "epoch": 2.7572464404220582, "grad_norm": 0.06976320978903298, "learning_rate": 8.788527730107066e-05, "loss": 0.8454, "step": 157050 }, { "epoch": 2.7574220053020593, "grad_norm": 0.0583763613773101, "learning_rate": 8.78742319630979e-05, "loss": 0.8437, "step": 157060 }, { "epoch": 2.7575975701820608, "grad_norm": 0.04892111168315456, "learning_rate": 8.786318679143122e-05, "loss": 0.8388, "step": 157070 }, { "epoch": 2.7577731350620622, "grad_norm": 0.047853255734508104, "learning_rate": 8.785214178621046e-05, "loss": 0.8438, "step": 157080 }, { "epoch": 2.7579486999420637, "grad_norm": 0.06860882352644158, "learning_rate": 8.784109694757553e-05, "loss": 0.8439, "step": 157090 }, { "epoch": 2.758124264822065, "grad_norm": 0.04504324988952119, "learning_rate": 8.783005227566647e-05, "loss": 0.8383, "step": 157100 }, { "epoch": 2.758299829702066, "grad_norm": 0.053814224615869904, "learning_rate": 8.781900777062317e-05, "loss": 0.8425, "step": 157110 }, { "epoch": 2.7584753945820677, "grad_norm": 0.07253287428485908, "learning_rate": 8.780796343258556e-05, "loss": 0.8498, "step": 157120 }, { "epoch": 2.758650959462069, "grad_norm": 0.05968525914165523, "learning_rate": 8.779691926169356e-05, "loss": 0.8403, "step": 157130 }, { "epoch": 2.7588265243420707, "grad_norm": 0.0669612503730494, "learning_rate": 8.778587525808716e-05, "loss": 0.8369, "step": 157140 }, { "epoch": 2.759002089222072, "grad_norm": 0.0669846889969934, "learning_rate": 8.777483142190625e-05, "loss": 0.8388, "step": 157150 }, { "epoch": 2.759177654102073, "grad_norm": 0.055318561655830184, "learning_rate": 8.77637877532907e-05, "loss": 0.8492, "step": 157160 }, { "epoch": 2.759353218982075, "grad_norm": 0.05446823527924726, "learning_rate": 8.775274425238056e-05, "loss": 0.8359, "step": 157170 }, { "epoch": 2.759528783862076, "grad_norm": 0.05643128989196942, "learning_rate": 8.774170091931568e-05, "loss": 0.849, "step": 157180 }, { "epoch": 2.7597043487420776, "grad_norm": 0.044843689263744775, "learning_rate": 8.7730657754236e-05, "loss": 0.8426, "step": 157190 }, { "epoch": 2.759879913622079, "grad_norm": 0.05628389102890671, "learning_rate": 8.771961475728138e-05, "loss": 0.8507, "step": 157200 }, { "epoch": 2.7600554785020806, "grad_norm": 0.06972056383216203, "learning_rate": 8.770857192859183e-05, "loss": 0.847, "step": 157210 }, { "epoch": 2.760231043382082, "grad_norm": 0.04893008537970542, "learning_rate": 8.769752926830722e-05, "loss": 0.8432, "step": 157220 }, { "epoch": 2.760406608262083, "grad_norm": 0.06530181723082469, "learning_rate": 8.768648677656742e-05, "loss": 0.8489, "step": 157230 }, { "epoch": 2.7605821731420845, "grad_norm": 0.058839535662014816, "learning_rate": 8.767544445351245e-05, "loss": 0.843, "step": 157240 }, { "epoch": 2.760757738022086, "grad_norm": 0.08069468431956432, "learning_rate": 8.766440229928212e-05, "loss": 0.8556, "step": 157250 }, { "epoch": 2.7609333029020875, "grad_norm": 0.0491133514494684, "learning_rate": 8.765336031401634e-05, "loss": 0.8434, "step": 157260 }, { "epoch": 2.761108867782089, "grad_norm": 0.052770297142934804, "learning_rate": 8.764231849785507e-05, "loss": 0.8442, "step": 157270 }, { "epoch": 2.76128443266209, "grad_norm": 0.04449842375501373, "learning_rate": 8.76312768509382e-05, "loss": 0.8433, "step": 157280 }, { "epoch": 2.761459997542092, "grad_norm": 0.060483888501226206, "learning_rate": 8.76202353734056e-05, "loss": 0.8428, "step": 157290 }, { "epoch": 2.761635562422093, "grad_norm": 0.0590747816071487, "learning_rate": 8.760919406539717e-05, "loss": 0.8501, "step": 157300 }, { "epoch": 2.7618111273020944, "grad_norm": 0.06594742633754799, "learning_rate": 8.759815292705284e-05, "loss": 0.8402, "step": 157310 }, { "epoch": 2.761986692182096, "grad_norm": 0.07461783835545312, "learning_rate": 8.758711195851248e-05, "loss": 0.8476, "step": 157320 }, { "epoch": 2.7621622570620974, "grad_norm": 0.05364486655179432, "learning_rate": 8.757607115991593e-05, "loss": 0.8359, "step": 157330 }, { "epoch": 2.762337821942099, "grad_norm": 0.05024527387530762, "learning_rate": 8.756503053140319e-05, "loss": 0.8483, "step": 157340 }, { "epoch": 2.7625133868221, "grad_norm": 0.052916735615289964, "learning_rate": 8.755399007311408e-05, "loss": 0.8359, "step": 157350 }, { "epoch": 2.7626889517021014, "grad_norm": 0.04926095648069745, "learning_rate": 8.754294978518848e-05, "loss": 0.8428, "step": 157360 }, { "epoch": 2.762864516582103, "grad_norm": 0.050011426679076196, "learning_rate": 8.753190966776628e-05, "loss": 0.8442, "step": 157370 }, { "epoch": 2.7630400814621043, "grad_norm": 0.05024618282544578, "learning_rate": 8.752086972098738e-05, "loss": 0.8437, "step": 157380 }, { "epoch": 2.763215646342106, "grad_norm": 0.06277252324111474, "learning_rate": 8.750982994499166e-05, "loss": 0.847, "step": 157390 }, { "epoch": 2.7633912112221073, "grad_norm": 0.05170288840330667, "learning_rate": 8.749879033991893e-05, "loss": 0.8341, "step": 157400 }, { "epoch": 2.7635667761021088, "grad_norm": 0.05377042858591191, "learning_rate": 8.748775090590916e-05, "loss": 0.8407, "step": 157410 }, { "epoch": 2.76374234098211, "grad_norm": 0.08202363213430908, "learning_rate": 8.747671164310215e-05, "loss": 0.8434, "step": 157420 }, { "epoch": 2.7639179058621113, "grad_norm": 0.04649054107016494, "learning_rate": 8.746567255163778e-05, "loss": 0.8488, "step": 157430 }, { "epoch": 2.7640934707421128, "grad_norm": 0.0605360250064372, "learning_rate": 8.745463363165597e-05, "loss": 0.8428, "step": 157440 }, { "epoch": 2.7642690356221142, "grad_norm": 0.07152242211938314, "learning_rate": 8.744359488329654e-05, "loss": 0.8411, "step": 157450 }, { "epoch": 2.7644446005021157, "grad_norm": 0.05281659293114526, "learning_rate": 8.743255630669937e-05, "loss": 0.8421, "step": 157460 }, { "epoch": 2.7646201653821167, "grad_norm": 0.05494862875335562, "learning_rate": 8.742151790200428e-05, "loss": 0.841, "step": 157470 }, { "epoch": 2.7647957302621182, "grad_norm": 0.06206497033131458, "learning_rate": 8.741047966935119e-05, "loss": 0.8491, "step": 157480 }, { "epoch": 2.7649712951421197, "grad_norm": 0.07548319024096652, "learning_rate": 8.739944160887991e-05, "loss": 0.8473, "step": 157490 }, { "epoch": 2.765146860022121, "grad_norm": 0.057634523609308166, "learning_rate": 8.738840372073028e-05, "loss": 0.8477, "step": 157500 }, { "epoch": 2.7653224249021227, "grad_norm": 0.04532186700354208, "learning_rate": 8.737736600504223e-05, "loss": 0.8392, "step": 157510 }, { "epoch": 2.765497989782124, "grad_norm": 0.06886100925686674, "learning_rate": 8.736632846195555e-05, "loss": 0.8386, "step": 157520 }, { "epoch": 2.7656735546621256, "grad_norm": 0.04977441439542064, "learning_rate": 8.735529109161012e-05, "loss": 0.8501, "step": 157530 }, { "epoch": 2.7658491195421266, "grad_norm": 0.07685475688445494, "learning_rate": 8.734425389414573e-05, "loss": 0.8402, "step": 157540 }, { "epoch": 2.766024684422128, "grad_norm": 0.05175423923634387, "learning_rate": 8.733321686970229e-05, "loss": 0.8388, "step": 157550 }, { "epoch": 2.7662002493021296, "grad_norm": 0.10521635639912774, "learning_rate": 8.732218001841963e-05, "loss": 0.8296, "step": 157560 }, { "epoch": 2.766375814182131, "grad_norm": 0.053390976242546635, "learning_rate": 8.73111433404375e-05, "loss": 0.844, "step": 157570 }, { "epoch": 2.7665513790621326, "grad_norm": 0.0565352904652092, "learning_rate": 8.730010683589588e-05, "loss": 0.8497, "step": 157580 }, { "epoch": 2.7667269439421336, "grad_norm": 0.06787607211849983, "learning_rate": 8.728907050493452e-05, "loss": 0.8488, "step": 157590 }, { "epoch": 2.766902508822135, "grad_norm": 0.04916681965352802, "learning_rate": 8.72780343476932e-05, "loss": 0.8376, "step": 157600 }, { "epoch": 2.7670780737021365, "grad_norm": 0.05663280667059593, "learning_rate": 8.726699836431186e-05, "loss": 0.8393, "step": 157610 }, { "epoch": 2.767253638582138, "grad_norm": 0.05316591462715608, "learning_rate": 8.725596255493031e-05, "loss": 0.8396, "step": 157620 }, { "epoch": 2.7674292034621395, "grad_norm": 0.05387106125260006, "learning_rate": 8.724492691968834e-05, "loss": 0.8463, "step": 157630 }, { "epoch": 2.767604768342141, "grad_norm": 0.04306219140455374, "learning_rate": 8.723389145872572e-05, "loss": 0.8434, "step": 157640 }, { "epoch": 2.7677803332221425, "grad_norm": 0.054590018581044994, "learning_rate": 8.722285617218239e-05, "loss": 0.8438, "step": 157650 }, { "epoch": 2.7679558981021435, "grad_norm": 0.04035791602368389, "learning_rate": 8.721182106019812e-05, "loss": 0.84, "step": 157660 }, { "epoch": 2.768131462982145, "grad_norm": 0.048682997848577644, "learning_rate": 8.720078612291266e-05, "loss": 0.8329, "step": 157670 }, { "epoch": 2.7683070278621464, "grad_norm": 0.05092098623382221, "learning_rate": 8.718975136046593e-05, "loss": 0.8346, "step": 157680 }, { "epoch": 2.768482592742148, "grad_norm": 0.07207592967554345, "learning_rate": 8.717871677299771e-05, "loss": 0.8388, "step": 157690 }, { "epoch": 2.7686581576221494, "grad_norm": 0.0560342206135456, "learning_rate": 8.716768236064778e-05, "loss": 0.8358, "step": 157700 }, { "epoch": 2.7688337225021504, "grad_norm": 0.06303645839217051, "learning_rate": 8.715664812355595e-05, "loss": 0.8419, "step": 157710 }, { "epoch": 2.7690092873821524, "grad_norm": 0.04731197077963036, "learning_rate": 8.714561406186206e-05, "loss": 0.8441, "step": 157720 }, { "epoch": 2.7691848522621534, "grad_norm": 0.057166184081497706, "learning_rate": 8.713458017570591e-05, "loss": 0.8487, "step": 157730 }, { "epoch": 2.769360417142155, "grad_norm": 0.04614596029528185, "learning_rate": 8.712354646522722e-05, "loss": 0.8493, "step": 157740 }, { "epoch": 2.7695359820221563, "grad_norm": 0.05682703239556557, "learning_rate": 8.711251293056594e-05, "loss": 0.843, "step": 157750 }, { "epoch": 2.769711546902158, "grad_norm": 0.0638358810620907, "learning_rate": 8.710147957186176e-05, "loss": 0.8332, "step": 157760 }, { "epoch": 2.7698871117821593, "grad_norm": 0.06717455383836266, "learning_rate": 8.709044638925443e-05, "loss": 0.8433, "step": 157770 }, { "epoch": 2.7700626766621603, "grad_norm": 0.06178642902593861, "learning_rate": 8.707941338288388e-05, "loss": 0.843, "step": 157780 }, { "epoch": 2.770238241542162, "grad_norm": 0.05202276328891303, "learning_rate": 8.706838055288983e-05, "loss": 0.8389, "step": 157790 }, { "epoch": 2.7704138064221633, "grad_norm": 0.06118544534581905, "learning_rate": 8.705734789941205e-05, "loss": 0.8421, "step": 157800 }, { "epoch": 2.7705893713021648, "grad_norm": 0.0479883874439823, "learning_rate": 8.704631542259033e-05, "loss": 0.8507, "step": 157810 }, { "epoch": 2.7707649361821662, "grad_norm": 0.06252791121061436, "learning_rate": 8.70352831225645e-05, "loss": 0.847, "step": 157820 }, { "epoch": 2.7709405010621673, "grad_norm": 0.06868476888808146, "learning_rate": 8.70242509994743e-05, "loss": 0.843, "step": 157830 }, { "epoch": 2.771116065942169, "grad_norm": 0.06370421750646033, "learning_rate": 8.701321905345947e-05, "loss": 0.8463, "step": 157840 }, { "epoch": 2.7712916308221702, "grad_norm": 0.0483724568502102, "learning_rate": 8.700218728465989e-05, "loss": 0.8379, "step": 157850 }, { "epoch": 2.7714671957021717, "grad_norm": 0.04462636167297381, "learning_rate": 8.699115569321526e-05, "loss": 0.837, "step": 157860 }, { "epoch": 2.771642760582173, "grad_norm": 0.060898790616022776, "learning_rate": 8.69801242792654e-05, "loss": 0.8424, "step": 157870 }, { "epoch": 2.7718183254621747, "grad_norm": 0.06995987591454805, "learning_rate": 8.696909304295001e-05, "loss": 0.8429, "step": 157880 }, { "epoch": 2.771993890342176, "grad_norm": 0.057444182390312466, "learning_rate": 8.695806198440893e-05, "loss": 0.8394, "step": 157890 }, { "epoch": 2.772169455222177, "grad_norm": 0.04450752316775776, "learning_rate": 8.694703110378191e-05, "loss": 0.8425, "step": 157900 }, { "epoch": 2.7723450201021786, "grad_norm": 0.05448558014265505, "learning_rate": 8.693600040120863e-05, "loss": 0.8397, "step": 157910 }, { "epoch": 2.77252058498218, "grad_norm": 0.04644179176854391, "learning_rate": 8.6924969876829e-05, "loss": 0.8433, "step": 157920 }, { "epoch": 2.7726961498621816, "grad_norm": 0.062290138087552085, "learning_rate": 8.691393953078267e-05, "loss": 0.8425, "step": 157930 }, { "epoch": 2.772871714742183, "grad_norm": 0.05636090416898908, "learning_rate": 8.690290936320937e-05, "loss": 0.8481, "step": 157940 }, { "epoch": 2.773047279622184, "grad_norm": 0.05149563146390951, "learning_rate": 8.689187937424899e-05, "loss": 0.8479, "step": 157950 }, { "epoch": 2.773222844502186, "grad_norm": 0.04347969033298756, "learning_rate": 8.688084956404117e-05, "loss": 0.8382, "step": 157960 }, { "epoch": 2.773398409382187, "grad_norm": 0.05143316475387085, "learning_rate": 8.686981993272572e-05, "loss": 0.8414, "step": 157970 }, { "epoch": 2.7735739742621885, "grad_norm": 0.05669151626306484, "learning_rate": 8.68587904804423e-05, "loss": 0.8444, "step": 157980 }, { "epoch": 2.77374953914219, "grad_norm": 0.05396448463268302, "learning_rate": 8.684776120733076e-05, "loss": 0.8429, "step": 157990 }, { "epoch": 2.7739251040221915, "grad_norm": 0.04641172818122232, "learning_rate": 8.683673211353079e-05, "loss": 0.8538, "step": 158000 }, { "epoch": 2.774100668902193, "grad_norm": 0.04572721519495913, "learning_rate": 8.682570319918211e-05, "loss": 0.841, "step": 158010 }, { "epoch": 2.774276233782194, "grad_norm": 0.07184679260116335, "learning_rate": 8.681467446442452e-05, "loss": 0.8437, "step": 158020 }, { "epoch": 2.7744517986621955, "grad_norm": 0.049338966478491036, "learning_rate": 8.680364590939772e-05, "loss": 0.8438, "step": 158030 }, { "epoch": 2.774627363542197, "grad_norm": 0.0572081364549409, "learning_rate": 8.679261753424145e-05, "loss": 0.8456, "step": 158040 }, { "epoch": 2.7748029284221984, "grad_norm": 0.04951340304904494, "learning_rate": 8.678158933909541e-05, "loss": 0.8462, "step": 158050 }, { "epoch": 2.7749784933022, "grad_norm": 0.05335713292404227, "learning_rate": 8.67705613240994e-05, "loss": 0.8401, "step": 158060 }, { "epoch": 2.775154058182201, "grad_norm": 0.07799416115840617, "learning_rate": 8.675953348939308e-05, "loss": 0.8363, "step": 158070 }, { "epoch": 2.775329623062203, "grad_norm": 0.05062751986449493, "learning_rate": 8.674850583511617e-05, "loss": 0.8399, "step": 158080 }, { "epoch": 2.775505187942204, "grad_norm": 0.04947107079062952, "learning_rate": 8.67374783614085e-05, "loss": 0.8391, "step": 158090 }, { "epoch": 2.7756807528222054, "grad_norm": 0.06667438723927128, "learning_rate": 8.672645106840967e-05, "loss": 0.8386, "step": 158100 }, { "epoch": 2.775856317702207, "grad_norm": 0.06208108880711767, "learning_rate": 8.671542395625941e-05, "loss": 0.8459, "step": 158110 }, { "epoch": 2.7760318825822083, "grad_norm": 0.048090918426966334, "learning_rate": 8.670439702509749e-05, "loss": 0.8495, "step": 158120 }, { "epoch": 2.77620744746221, "grad_norm": 0.07152244259270604, "learning_rate": 8.669337027506363e-05, "loss": 0.8371, "step": 158130 }, { "epoch": 2.776383012342211, "grad_norm": 0.06166699841861759, "learning_rate": 8.668234370629748e-05, "loss": 0.847, "step": 158140 }, { "epoch": 2.7765585772222123, "grad_norm": 0.04878371050190883, "learning_rate": 8.667131731893877e-05, "loss": 0.8383, "step": 158150 }, { "epoch": 2.776734142102214, "grad_norm": 0.06049321938653896, "learning_rate": 8.666029111312724e-05, "loss": 0.8447, "step": 158160 }, { "epoch": 2.7769097069822153, "grad_norm": 0.04687975523956496, "learning_rate": 8.664926508900258e-05, "loss": 0.8453, "step": 158170 }, { "epoch": 2.7770852718622168, "grad_norm": 0.044803268807973244, "learning_rate": 8.663823924670442e-05, "loss": 0.8432, "step": 158180 }, { "epoch": 2.7772608367422182, "grad_norm": 0.060195486639835205, "learning_rate": 8.662721358637257e-05, "loss": 0.8502, "step": 158190 }, { "epoch": 2.7774364016222197, "grad_norm": 0.04596803590194419, "learning_rate": 8.661618810814668e-05, "loss": 0.8445, "step": 158200 }, { "epoch": 2.7776119665022208, "grad_norm": 0.06729081329942413, "learning_rate": 8.660516281216645e-05, "loss": 0.8378, "step": 158210 }, { "epoch": 2.7777875313822222, "grad_norm": 0.04696980261365667, "learning_rate": 8.659413769857153e-05, "loss": 0.8496, "step": 158220 }, { "epoch": 2.7779630962622237, "grad_norm": 0.05131835651328704, "learning_rate": 8.658311276750167e-05, "loss": 0.8442, "step": 158230 }, { "epoch": 2.778138661142225, "grad_norm": 0.058271109449735604, "learning_rate": 8.657208801909654e-05, "loss": 0.8388, "step": 158240 }, { "epoch": 2.7783142260222267, "grad_norm": 0.04635449966942686, "learning_rate": 8.656106345349577e-05, "loss": 0.8386, "step": 158250 }, { "epoch": 2.7784897909022277, "grad_norm": 0.056211429093765475, "learning_rate": 8.655003907083918e-05, "loss": 0.8469, "step": 158260 }, { "epoch": 2.778665355782229, "grad_norm": 0.04893357825554613, "learning_rate": 8.653901487126632e-05, "loss": 0.8465, "step": 158270 }, { "epoch": 2.7788409206622307, "grad_norm": 0.05198798882084262, "learning_rate": 8.652799085491692e-05, "loss": 0.8496, "step": 158280 }, { "epoch": 2.779016485542232, "grad_norm": 0.06326205860011798, "learning_rate": 8.65169670219306e-05, "loss": 0.8486, "step": 158290 }, { "epoch": 2.7791920504222336, "grad_norm": 0.11329436860621157, "learning_rate": 8.650594337244713e-05, "loss": 0.8427, "step": 158300 }, { "epoch": 2.779367615302235, "grad_norm": 0.043525768791299324, "learning_rate": 8.649491990660615e-05, "loss": 0.8361, "step": 158310 }, { "epoch": 2.7795431801822366, "grad_norm": 0.06493938779800938, "learning_rate": 8.648389662454728e-05, "loss": 0.8521, "step": 158320 }, { "epoch": 2.7797187450622376, "grad_norm": 0.06690131278318882, "learning_rate": 8.647287352641023e-05, "loss": 0.8448, "step": 158330 }, { "epoch": 2.779894309942239, "grad_norm": 0.0525095478718427, "learning_rate": 8.646185061233469e-05, "loss": 0.844, "step": 158340 }, { "epoch": 2.7800698748222405, "grad_norm": 0.0639953191584054, "learning_rate": 8.645082788246022e-05, "loss": 0.8403, "step": 158350 }, { "epoch": 2.780245439702242, "grad_norm": 0.054570928249641645, "learning_rate": 8.64398053369266e-05, "loss": 0.8453, "step": 158360 }, { "epoch": 2.7804210045822435, "grad_norm": 0.048841201085192125, "learning_rate": 8.642878297587343e-05, "loss": 0.8419, "step": 158370 }, { "epoch": 2.7805965694622445, "grad_norm": 0.10443432113410675, "learning_rate": 8.641776079944038e-05, "loss": 0.8407, "step": 158380 }, { "epoch": 2.780772134342246, "grad_norm": 0.05549723210141879, "learning_rate": 8.640673880776708e-05, "loss": 0.8478, "step": 158390 }, { "epoch": 2.7809476992222475, "grad_norm": 0.05519627587518345, "learning_rate": 8.639571700099321e-05, "loss": 0.8459, "step": 158400 }, { "epoch": 2.781123264102249, "grad_norm": 0.05143219693949454, "learning_rate": 8.63846953792584e-05, "loss": 0.8367, "step": 158410 }, { "epoch": 2.7812988289822504, "grad_norm": 0.0449010734983184, "learning_rate": 8.637367394270226e-05, "loss": 0.8395, "step": 158420 }, { "epoch": 2.781474393862252, "grad_norm": 0.0477941032902872, "learning_rate": 8.636265269146454e-05, "loss": 0.8408, "step": 158430 }, { "epoch": 2.7816499587422534, "grad_norm": 0.05579431535867657, "learning_rate": 8.635163162568478e-05, "loss": 0.8456, "step": 158440 }, { "epoch": 2.7818255236222544, "grad_norm": 0.05455066420252598, "learning_rate": 8.634061074550268e-05, "loss": 0.8365, "step": 158450 }, { "epoch": 2.782001088502256, "grad_norm": 0.04363808512331947, "learning_rate": 8.63295900510578e-05, "loss": 0.8501, "step": 158460 }, { "epoch": 2.7821766533822574, "grad_norm": 0.05616773680683007, "learning_rate": 8.631856954248985e-05, "loss": 0.8409, "step": 158470 }, { "epoch": 2.782352218262259, "grad_norm": 0.06686073303482128, "learning_rate": 8.630754921993846e-05, "loss": 0.8372, "step": 158480 }, { "epoch": 2.7825277831422603, "grad_norm": 0.05069042331209733, "learning_rate": 8.629652908354321e-05, "loss": 0.8375, "step": 158490 }, { "epoch": 2.7827033480222614, "grad_norm": 0.057597028543256976, "learning_rate": 8.628550913344378e-05, "loss": 0.8405, "step": 158500 }, { "epoch": 2.7828789129022633, "grad_norm": 0.07186710768314743, "learning_rate": 8.627448936977976e-05, "loss": 0.8423, "step": 158510 }, { "epoch": 2.7830544777822643, "grad_norm": 0.047845336660489984, "learning_rate": 8.626346979269075e-05, "loss": 0.8371, "step": 158520 }, { "epoch": 2.783230042662266, "grad_norm": 0.05201955868370502, "learning_rate": 8.625245040231644e-05, "loss": 0.8405, "step": 158530 }, { "epoch": 2.7834056075422673, "grad_norm": 0.044581402378895144, "learning_rate": 8.624143119879641e-05, "loss": 0.8416, "step": 158540 }, { "epoch": 2.7835811724222688, "grad_norm": 0.06382787286265847, "learning_rate": 8.623041218227027e-05, "loss": 0.8419, "step": 158550 }, { "epoch": 2.7837567373022702, "grad_norm": 0.0488743912975959, "learning_rate": 8.621939335287762e-05, "loss": 0.8362, "step": 158560 }, { "epoch": 2.7839323021822713, "grad_norm": 0.05093683996168448, "learning_rate": 8.620837471075811e-05, "loss": 0.8532, "step": 158570 }, { "epoch": 2.7841078670622728, "grad_norm": 0.06218276900639767, "learning_rate": 8.619735625605133e-05, "loss": 0.8448, "step": 158580 }, { "epoch": 2.7842834319422742, "grad_norm": 0.04854696670169906, "learning_rate": 8.618633798889683e-05, "loss": 0.8472, "step": 158590 }, { "epoch": 2.7844589968222757, "grad_norm": 0.04605396199715163, "learning_rate": 8.617531990943433e-05, "loss": 0.8416, "step": 158600 }, { "epoch": 2.784634561702277, "grad_norm": 0.07836526005636074, "learning_rate": 8.616430201780337e-05, "loss": 0.8432, "step": 158610 }, { "epoch": 2.7848101265822782, "grad_norm": 0.05268586318815114, "learning_rate": 8.615328431414352e-05, "loss": 0.8424, "step": 158620 }, { "epoch": 2.78498569146228, "grad_norm": 0.06865943463411125, "learning_rate": 8.614226679859438e-05, "loss": 0.8417, "step": 158630 }, { "epoch": 2.785161256342281, "grad_norm": 0.04725660904191454, "learning_rate": 8.613124947129561e-05, "loss": 0.8419, "step": 158640 }, { "epoch": 2.7853368212222827, "grad_norm": 0.05352157562236789, "learning_rate": 8.612023233238674e-05, "loss": 0.84, "step": 158650 }, { "epoch": 2.785512386102284, "grad_norm": 0.055172886636973166, "learning_rate": 8.610921538200737e-05, "loss": 0.8375, "step": 158660 }, { "epoch": 2.7856879509822856, "grad_norm": 0.04920089970144094, "learning_rate": 8.60981986202971e-05, "loss": 0.8471, "step": 158670 }, { "epoch": 2.785863515862287, "grad_norm": 0.06024649074027634, "learning_rate": 8.608718204739554e-05, "loss": 0.8404, "step": 158680 }, { "epoch": 2.786039080742288, "grad_norm": 0.06036381666778544, "learning_rate": 8.607616566344217e-05, "loss": 0.8472, "step": 158690 }, { "epoch": 2.7862146456222896, "grad_norm": 0.05807791863694737, "learning_rate": 8.606514946857671e-05, "loss": 0.8496, "step": 158700 }, { "epoch": 2.786390210502291, "grad_norm": 0.051859188052313925, "learning_rate": 8.605413346293864e-05, "loss": 0.8422, "step": 158710 }, { "epoch": 2.7865657753822926, "grad_norm": 0.05603217176067365, "learning_rate": 8.604311764666758e-05, "loss": 0.8451, "step": 158720 }, { "epoch": 2.786741340262294, "grad_norm": 0.04973074231568203, "learning_rate": 8.603210201990306e-05, "loss": 0.8426, "step": 158730 }, { "epoch": 2.786916905142295, "grad_norm": 0.06104351631040734, "learning_rate": 8.60210865827847e-05, "loss": 0.8436, "step": 158740 }, { "epoch": 2.787092470022297, "grad_norm": 0.05970773404756986, "learning_rate": 8.601007133545204e-05, "loss": 0.8494, "step": 158750 }, { "epoch": 2.787268034902298, "grad_norm": 0.05296430330061749, "learning_rate": 8.599905627804462e-05, "loss": 0.8463, "step": 158760 }, { "epoch": 2.7874435997822995, "grad_norm": 0.059176164419269456, "learning_rate": 8.598804141070207e-05, "loss": 0.8431, "step": 158770 }, { "epoch": 2.787619164662301, "grad_norm": 0.06591104176025231, "learning_rate": 8.597702673356391e-05, "loss": 0.8388, "step": 158780 }, { "epoch": 2.7877947295423025, "grad_norm": 0.04986061205472818, "learning_rate": 8.596601224676972e-05, "loss": 0.8426, "step": 158790 }, { "epoch": 2.787970294422304, "grad_norm": 0.06610617950020466, "learning_rate": 8.595499795045896e-05, "loss": 0.8395, "step": 158800 }, { "epoch": 2.788145859302305, "grad_norm": 0.05314232025389221, "learning_rate": 8.594398384477132e-05, "loss": 0.8478, "step": 158810 }, { "epoch": 2.7883214241823064, "grad_norm": 0.04653120537964994, "learning_rate": 8.593296992984627e-05, "loss": 0.8349, "step": 158820 }, { "epoch": 2.788496989062308, "grad_norm": 0.05706595515811312, "learning_rate": 8.592195620582336e-05, "loss": 0.8446, "step": 158830 }, { "epoch": 2.7886725539423094, "grad_norm": 0.05553761594945306, "learning_rate": 8.591094267284219e-05, "loss": 0.8357, "step": 158840 }, { "epoch": 2.788848118822311, "grad_norm": 0.05860022512185123, "learning_rate": 8.589992933104227e-05, "loss": 0.8394, "step": 158850 }, { "epoch": 2.7890236837023124, "grad_norm": 0.049281395507239485, "learning_rate": 8.58889161805631e-05, "loss": 0.8442, "step": 158860 }, { "epoch": 2.789199248582314, "grad_norm": 0.04880175818854498, "learning_rate": 8.587790322154429e-05, "loss": 0.8386, "step": 158870 }, { "epoch": 2.789374813462315, "grad_norm": 0.06105512038976036, "learning_rate": 8.586689045412533e-05, "loss": 0.8419, "step": 158880 }, { "epoch": 2.7895503783423163, "grad_norm": 0.055084473067207415, "learning_rate": 8.585587787844578e-05, "loss": 0.8394, "step": 158890 }, { "epoch": 2.789725943222318, "grad_norm": 0.06160682739639107, "learning_rate": 8.584486549464515e-05, "loss": 0.8409, "step": 158900 }, { "epoch": 2.7899015081023193, "grad_norm": 0.05128942998158076, "learning_rate": 8.583385330286298e-05, "loss": 0.8395, "step": 158910 }, { "epoch": 2.7900770729823208, "grad_norm": 0.07787642794463664, "learning_rate": 8.582284130323882e-05, "loss": 0.8419, "step": 158920 }, { "epoch": 2.790252637862322, "grad_norm": 0.05207960146624838, "learning_rate": 8.581182949591211e-05, "loss": 0.8468, "step": 158930 }, { "epoch": 2.7904282027423233, "grad_norm": 0.05407801650758095, "learning_rate": 8.580081788102251e-05, "loss": 0.8426, "step": 158940 }, { "epoch": 2.7906037676223248, "grad_norm": 0.05350693271373002, "learning_rate": 8.578980645870942e-05, "loss": 0.8363, "step": 158950 }, { "epoch": 2.7907793325023262, "grad_norm": 0.0521973306336239, "learning_rate": 8.577879522911241e-05, "loss": 0.8475, "step": 158960 }, { "epoch": 2.7909548973823277, "grad_norm": 0.050346091585339016, "learning_rate": 8.576778419237093e-05, "loss": 0.8485, "step": 158970 }, { "epoch": 2.791130462262329, "grad_norm": 0.05300812578281723, "learning_rate": 8.57567733486246e-05, "loss": 0.8409, "step": 158980 }, { "epoch": 2.7913060271423307, "grad_norm": 0.05532956063875565, "learning_rate": 8.574576269801286e-05, "loss": 0.84, "step": 158990 }, { "epoch": 2.7914815920223317, "grad_norm": 0.052333398373000356, "learning_rate": 8.57347522406752e-05, "loss": 0.8435, "step": 159000 }, { "epoch": 2.791657156902333, "grad_norm": 0.042597584738477426, "learning_rate": 8.572374197675119e-05, "loss": 0.8383, "step": 159010 }, { "epoch": 2.7918327217823347, "grad_norm": 0.044470674889723394, "learning_rate": 8.57127319063803e-05, "loss": 0.8425, "step": 159020 }, { "epoch": 2.792008286662336, "grad_norm": 0.06084240265873605, "learning_rate": 8.570172202970198e-05, "loss": 0.837, "step": 159030 }, { "epoch": 2.7921838515423376, "grad_norm": 0.05085402855286349, "learning_rate": 8.569071234685582e-05, "loss": 0.8427, "step": 159040 }, { "epoch": 2.7923594164223386, "grad_norm": 0.05624618692124495, "learning_rate": 8.567970285798125e-05, "loss": 0.8474, "step": 159050 }, { "epoch": 2.79253498130234, "grad_norm": 0.05093056436742468, "learning_rate": 8.566869356321781e-05, "loss": 0.8442, "step": 159060 }, { "epoch": 2.7927105461823416, "grad_norm": 0.05958375153675825, "learning_rate": 8.565768446270491e-05, "loss": 0.8417, "step": 159070 }, { "epoch": 2.792886111062343, "grad_norm": 0.06138728443283651, "learning_rate": 8.564667555658214e-05, "loss": 0.8466, "step": 159080 }, { "epoch": 2.7930616759423446, "grad_norm": 0.0554213553814544, "learning_rate": 8.56356668449889e-05, "loss": 0.8375, "step": 159090 }, { "epoch": 2.793237240822346, "grad_norm": 0.05128291532401258, "learning_rate": 8.562465832806469e-05, "loss": 0.8467, "step": 159100 }, { "epoch": 2.7934128057023475, "grad_norm": 0.06604643071122562, "learning_rate": 8.561365000594906e-05, "loss": 0.8363, "step": 159110 }, { "epoch": 2.7935883705823485, "grad_norm": 0.051331133465648916, "learning_rate": 8.560264187878141e-05, "loss": 0.8471, "step": 159120 }, { "epoch": 2.79376393546235, "grad_norm": 0.05278182983648806, "learning_rate": 8.559163394670125e-05, "loss": 0.8415, "step": 159130 }, { "epoch": 2.7939395003423515, "grad_norm": 0.08589667770226175, "learning_rate": 8.558062620984798e-05, "loss": 0.8382, "step": 159140 }, { "epoch": 2.794115065222353, "grad_norm": 0.04716796796982661, "learning_rate": 8.556961866836118e-05, "loss": 0.8409, "step": 159150 }, { "epoch": 2.7942906301023545, "grad_norm": 0.05081963981492891, "learning_rate": 8.555861132238026e-05, "loss": 0.8383, "step": 159160 }, { "epoch": 2.7944661949823555, "grad_norm": 0.05022615085480669, "learning_rate": 8.554760417204468e-05, "loss": 0.8449, "step": 159170 }, { "epoch": 2.7946417598623574, "grad_norm": 0.07193317320925817, "learning_rate": 8.553659721749394e-05, "loss": 0.8345, "step": 159180 }, { "epoch": 2.7948173247423584, "grad_norm": 0.051188041024636935, "learning_rate": 8.552559045886749e-05, "loss": 0.8309, "step": 159190 }, { "epoch": 2.79499288962236, "grad_norm": 0.05771250632597885, "learning_rate": 8.551458389630471e-05, "loss": 0.8429, "step": 159200 }, { "epoch": 2.7951684545023614, "grad_norm": 0.049492463621484434, "learning_rate": 8.550357752994517e-05, "loss": 0.8454, "step": 159210 }, { "epoch": 2.795344019382363, "grad_norm": 0.05395801458060741, "learning_rate": 8.549257135992827e-05, "loss": 0.8426, "step": 159220 }, { "epoch": 2.7955195842623644, "grad_norm": 0.05515268161196609, "learning_rate": 8.548156538639347e-05, "loss": 0.8428, "step": 159230 }, { "epoch": 2.7956951491423654, "grad_norm": 0.0717171683345448, "learning_rate": 8.54705596094802e-05, "loss": 0.8401, "step": 159240 }, { "epoch": 2.795870714022367, "grad_norm": 0.052781533002429, "learning_rate": 8.545955402932792e-05, "loss": 0.851, "step": 159250 }, { "epoch": 2.7960462789023683, "grad_norm": 0.0549593198434756, "learning_rate": 8.544854864607607e-05, "loss": 0.8407, "step": 159260 }, { "epoch": 2.79622184378237, "grad_norm": 0.05355986374197193, "learning_rate": 8.543754345986405e-05, "loss": 0.8455, "step": 159270 }, { "epoch": 2.7963974086623713, "grad_norm": 0.04388339799737148, "learning_rate": 8.542653847083139e-05, "loss": 0.8397, "step": 159280 }, { "epoch": 2.7965729735423723, "grad_norm": 0.053226627880271445, "learning_rate": 8.541553367911748e-05, "loss": 0.8494, "step": 159290 }, { "epoch": 2.7967485384223743, "grad_norm": 0.0494812997689779, "learning_rate": 8.540452908486172e-05, "loss": 0.8472, "step": 159300 }, { "epoch": 2.7969241033023753, "grad_norm": 0.07640198276218552, "learning_rate": 8.539352468820352e-05, "loss": 0.8372, "step": 159310 }, { "epoch": 2.7970996681823768, "grad_norm": 0.04713261500811805, "learning_rate": 8.53825204892824e-05, "loss": 0.8469, "step": 159320 }, { "epoch": 2.7972752330623782, "grad_norm": 0.07569580350041524, "learning_rate": 8.537151648823775e-05, "loss": 0.8392, "step": 159330 }, { "epoch": 2.7974507979423797, "grad_norm": 0.05107531632923652, "learning_rate": 8.536051268520895e-05, "loss": 0.8496, "step": 159340 }, { "epoch": 2.797626362822381, "grad_norm": 0.04918029626961618, "learning_rate": 8.534950908033548e-05, "loss": 0.8413, "step": 159350 }, { "epoch": 2.7978019277023822, "grad_norm": 0.05055961600921949, "learning_rate": 8.533850567375672e-05, "loss": 0.8463, "step": 159360 }, { "epoch": 2.7979774925823837, "grad_norm": 0.05119570020531404, "learning_rate": 8.532750246561206e-05, "loss": 0.8483, "step": 159370 }, { "epoch": 2.798153057462385, "grad_norm": 0.056599074651058304, "learning_rate": 8.531649945604099e-05, "loss": 0.8493, "step": 159380 }, { "epoch": 2.7983286223423867, "grad_norm": 0.050220836376803406, "learning_rate": 8.530549664518288e-05, "loss": 0.8315, "step": 159390 }, { "epoch": 2.798504187222388, "grad_norm": 0.06863333639428916, "learning_rate": 8.529449403317713e-05, "loss": 0.8414, "step": 159400 }, { "epoch": 2.798679752102389, "grad_norm": 0.0952199321845148, "learning_rate": 8.528349162016312e-05, "loss": 0.8438, "step": 159410 }, { "epoch": 2.798855316982391, "grad_norm": 0.05864172388304522, "learning_rate": 8.527248940628032e-05, "loss": 0.8427, "step": 159420 }, { "epoch": 2.799030881862392, "grad_norm": 0.07012663186760426, "learning_rate": 8.526148739166808e-05, "loss": 0.8494, "step": 159430 }, { "epoch": 2.7992064467423936, "grad_norm": 0.04946192938762748, "learning_rate": 8.525048557646578e-05, "loss": 0.8362, "step": 159440 }, { "epoch": 2.799382011622395, "grad_norm": 0.04955313043542959, "learning_rate": 8.52394839608129e-05, "loss": 0.8437, "step": 159450 }, { "epoch": 2.7995575765023966, "grad_norm": 0.05789980965811333, "learning_rate": 8.522848254484877e-05, "loss": 0.8431, "step": 159460 }, { "epoch": 2.799733141382398, "grad_norm": 0.05677707531491286, "learning_rate": 8.521748132871278e-05, "loss": 0.84, "step": 159470 }, { "epoch": 2.799908706262399, "grad_norm": 0.05874476914172145, "learning_rate": 8.520648031254428e-05, "loss": 0.8476, "step": 159480 }, { "epoch": 2.8000842711424005, "grad_norm": 0.05211770171996852, "learning_rate": 8.519547949648276e-05, "loss": 0.8424, "step": 159490 }, { "epoch": 2.800259836022402, "grad_norm": 0.0472693846047477, "learning_rate": 8.518447888066752e-05, "loss": 0.8394, "step": 159500 }, { "epoch": 2.8004354009024035, "grad_norm": 0.05930359456210888, "learning_rate": 8.517347846523795e-05, "loss": 0.8474, "step": 159510 }, { "epoch": 2.800610965782405, "grad_norm": 0.0661985779369035, "learning_rate": 8.516247825033348e-05, "loss": 0.8503, "step": 159520 }, { "epoch": 2.800786530662406, "grad_norm": 0.07132274988183425, "learning_rate": 8.515147823609342e-05, "loss": 0.8428, "step": 159530 }, { "epoch": 2.800962095542408, "grad_norm": 0.07163047668333795, "learning_rate": 8.514047842265714e-05, "loss": 0.8431, "step": 159540 }, { "epoch": 2.801137660422409, "grad_norm": 0.056944741211752484, "learning_rate": 8.512947881016407e-05, "loss": 0.8416, "step": 159550 }, { "epoch": 2.8013132253024104, "grad_norm": 0.048927663520118035, "learning_rate": 8.511847939875357e-05, "loss": 0.8505, "step": 159560 }, { "epoch": 2.801488790182412, "grad_norm": 0.05575729156511879, "learning_rate": 8.510748018856496e-05, "loss": 0.8391, "step": 159570 }, { "epoch": 2.8016643550624134, "grad_norm": 0.06199330793792168, "learning_rate": 8.50964811797376e-05, "loss": 0.841, "step": 159580 }, { "epoch": 2.801839919942415, "grad_norm": 0.05684838541316369, "learning_rate": 8.508548237241091e-05, "loss": 0.849, "step": 159590 }, { "epoch": 2.802015484822416, "grad_norm": 0.08586333865159287, "learning_rate": 8.50744837667242e-05, "loss": 0.8395, "step": 159600 }, { "epoch": 2.8021910497024174, "grad_norm": 0.057259831093895035, "learning_rate": 8.506348536281677e-05, "loss": 0.8456, "step": 159610 }, { "epoch": 2.802366614582419, "grad_norm": 0.1657633234467731, "learning_rate": 8.505248716082811e-05, "loss": 0.8301, "step": 159620 }, { "epoch": 2.8025421794624203, "grad_norm": 0.048835635706618696, "learning_rate": 8.50414891608975e-05, "loss": 0.8489, "step": 159630 }, { "epoch": 2.802717744342422, "grad_norm": 0.05605573010585747, "learning_rate": 8.503049136316425e-05, "loss": 0.8451, "step": 159640 }, { "epoch": 2.8028933092224233, "grad_norm": 0.04641556803912904, "learning_rate": 8.50194937677677e-05, "loss": 0.8347, "step": 159650 }, { "epoch": 2.803068874102425, "grad_norm": 0.04618997714988377, "learning_rate": 8.500849637484727e-05, "loss": 0.8313, "step": 159660 }, { "epoch": 2.803244438982426, "grad_norm": 0.05050587143416418, "learning_rate": 8.499749918454225e-05, "loss": 0.8467, "step": 159670 }, { "epoch": 2.8034200038624273, "grad_norm": 0.06033682875467223, "learning_rate": 8.498650219699197e-05, "loss": 0.8477, "step": 159680 }, { "epoch": 2.8035955687424288, "grad_norm": 0.05144665609336715, "learning_rate": 8.497550541233578e-05, "loss": 0.8526, "step": 159690 }, { "epoch": 2.8037711336224302, "grad_norm": 0.07491240359445099, "learning_rate": 8.496450883071302e-05, "loss": 0.8469, "step": 159700 }, { "epoch": 2.8039466985024317, "grad_norm": 0.07489346537225301, "learning_rate": 8.495351245226297e-05, "loss": 0.8449, "step": 159710 }, { "epoch": 2.8041222633824328, "grad_norm": 0.058331713247399795, "learning_rate": 8.494251627712503e-05, "loss": 0.8526, "step": 159720 }, { "epoch": 2.8042978282624342, "grad_norm": 0.06254350588900517, "learning_rate": 8.493152030543848e-05, "loss": 0.8465, "step": 159730 }, { "epoch": 2.8044733931424357, "grad_norm": 0.05309013627845153, "learning_rate": 8.492052453734266e-05, "loss": 0.8423, "step": 159740 }, { "epoch": 2.804648958022437, "grad_norm": 0.042902265312203355, "learning_rate": 8.490952897297685e-05, "loss": 0.8387, "step": 159750 }, { "epoch": 2.8048245229024387, "grad_norm": 0.04501293243603132, "learning_rate": 8.48985336124804e-05, "loss": 0.8434, "step": 159760 }, { "epoch": 2.80500008778244, "grad_norm": 0.05562700824700469, "learning_rate": 8.488753845599263e-05, "loss": 0.8427, "step": 159770 }, { "epoch": 2.8051756526624416, "grad_norm": 0.04717435511897762, "learning_rate": 8.48765435036528e-05, "loss": 0.852, "step": 159780 }, { "epoch": 2.8053512175424427, "grad_norm": 0.07038818041343443, "learning_rate": 8.486554875560031e-05, "loss": 0.8436, "step": 159790 }, { "epoch": 2.805526782422444, "grad_norm": 0.06692863432221277, "learning_rate": 8.485455421197438e-05, "loss": 0.8381, "step": 159800 }, { "epoch": 2.8057023473024456, "grad_norm": 0.05204373705291223, "learning_rate": 8.484355987291435e-05, "loss": 0.8425, "step": 159810 }, { "epoch": 2.805877912182447, "grad_norm": 0.04236628705575725, "learning_rate": 8.483256573855946e-05, "loss": 0.8467, "step": 159820 }, { "epoch": 2.8060534770624486, "grad_norm": 0.06488416632436568, "learning_rate": 8.48215718090491e-05, "loss": 0.8507, "step": 159830 }, { "epoch": 2.8062290419424496, "grad_norm": 0.05642276851051412, "learning_rate": 8.481057808452254e-05, "loss": 0.8467, "step": 159840 }, { "epoch": 2.806404606822451, "grad_norm": 0.04980231602289356, "learning_rate": 8.479958456511903e-05, "loss": 0.8353, "step": 159850 }, { "epoch": 2.8065801717024526, "grad_norm": 0.09598246136587105, "learning_rate": 8.478859125097792e-05, "loss": 0.8384, "step": 159860 }, { "epoch": 2.806755736582454, "grad_norm": 0.05564655975606287, "learning_rate": 8.477759814223846e-05, "loss": 0.8269, "step": 159870 }, { "epoch": 2.8069313014624555, "grad_norm": 0.061936184111871566, "learning_rate": 8.47666052390399e-05, "loss": 0.8447, "step": 159880 }, { "epoch": 2.807106866342457, "grad_norm": 0.05362869037056806, "learning_rate": 8.47556125415216e-05, "loss": 0.841, "step": 159890 }, { "epoch": 2.8072824312224585, "grad_norm": 0.04205825136860857, "learning_rate": 8.474462004982279e-05, "loss": 0.8373, "step": 159900 }, { "epoch": 2.8074579961024595, "grad_norm": 0.0539424428029717, "learning_rate": 8.473362776408279e-05, "loss": 0.8445, "step": 159910 }, { "epoch": 2.807633560982461, "grad_norm": 0.06256823357144665, "learning_rate": 8.47226356844408e-05, "loss": 0.8436, "step": 159920 }, { "epoch": 2.8078091258624625, "grad_norm": 0.053094663794082224, "learning_rate": 8.471164381103616e-05, "loss": 0.8448, "step": 159930 }, { "epoch": 2.807984690742464, "grad_norm": 0.052275358924130134, "learning_rate": 8.470065214400812e-05, "loss": 0.836, "step": 159940 }, { "epoch": 2.8081602556224654, "grad_norm": 0.055402022163586015, "learning_rate": 8.468966068349588e-05, "loss": 0.856, "step": 159950 }, { "epoch": 2.8083358205024664, "grad_norm": 0.056982509726870884, "learning_rate": 8.467866942963886e-05, "loss": 0.8425, "step": 159960 }, { "epoch": 2.8085113853824684, "grad_norm": 0.06113078799450122, "learning_rate": 8.466767838257618e-05, "loss": 0.8403, "step": 159970 }, { "epoch": 2.8086869502624694, "grad_norm": 0.0559447521449295, "learning_rate": 8.465668754244715e-05, "loss": 0.8428, "step": 159980 }, { "epoch": 2.808862515142471, "grad_norm": 0.062118545851345595, "learning_rate": 8.4645696909391e-05, "loss": 0.8382, "step": 159990 }, { "epoch": 2.8090380800224724, "grad_norm": 0.04044311778708643, "learning_rate": 8.463470648354703e-05, "loss": 0.8439, "step": 160000 }, { "epoch": 2.809213644902474, "grad_norm": 0.06728042689674879, "learning_rate": 8.462371626505446e-05, "loss": 0.8409, "step": 160010 }, { "epoch": 2.8093892097824753, "grad_norm": 0.06076037449401969, "learning_rate": 8.461272625405252e-05, "loss": 0.8518, "step": 160020 }, { "epoch": 2.8095647746624763, "grad_norm": 0.0556081199606742, "learning_rate": 8.460173645068051e-05, "loss": 0.8402, "step": 160030 }, { "epoch": 2.809740339542478, "grad_norm": 0.056883802526134015, "learning_rate": 8.459074685507765e-05, "loss": 0.8416, "step": 160040 }, { "epoch": 2.8099159044224793, "grad_norm": 0.08659864001329541, "learning_rate": 8.457975746738312e-05, "loss": 0.8391, "step": 160050 }, { "epoch": 2.8100914693024808, "grad_norm": 0.04711294224348655, "learning_rate": 8.456876828773624e-05, "loss": 0.8427, "step": 160060 }, { "epoch": 2.8102670341824822, "grad_norm": 0.04987986496807564, "learning_rate": 8.455777931627622e-05, "loss": 0.8435, "step": 160070 }, { "epoch": 2.8104425990624833, "grad_norm": 0.06643118471009697, "learning_rate": 8.454679055314229e-05, "loss": 0.8443, "step": 160080 }, { "epoch": 2.810618163942485, "grad_norm": 0.06026034238001293, "learning_rate": 8.453580199847366e-05, "loss": 0.8421, "step": 160090 }, { "epoch": 2.8107937288224862, "grad_norm": 0.05086473232537924, "learning_rate": 8.45248136524096e-05, "loss": 0.8497, "step": 160100 }, { "epoch": 2.8109692937024877, "grad_norm": 0.04317276033245278, "learning_rate": 8.45138255150893e-05, "loss": 0.8492, "step": 160110 }, { "epoch": 2.811144858582489, "grad_norm": 0.062194531812848244, "learning_rate": 8.450283758665194e-05, "loss": 0.8328, "step": 160120 }, { "epoch": 2.8113204234624907, "grad_norm": 0.049008940008715654, "learning_rate": 8.449184986723689e-05, "loss": 0.8327, "step": 160130 }, { "epoch": 2.811495988342492, "grad_norm": 0.05845368572181331, "learning_rate": 8.448086235698322e-05, "loss": 0.8464, "step": 160140 }, { "epoch": 2.811671553222493, "grad_norm": 0.046765418424337374, "learning_rate": 8.446987505603018e-05, "loss": 0.8509, "step": 160150 }, { "epoch": 2.8118471181024947, "grad_norm": 0.0525753534101565, "learning_rate": 8.445888796451695e-05, "loss": 0.8373, "step": 160160 }, { "epoch": 2.812022682982496, "grad_norm": 0.046712304923504346, "learning_rate": 8.444790108258283e-05, "loss": 0.8472, "step": 160170 }, { "epoch": 2.8121982478624976, "grad_norm": 0.05638136878664301, "learning_rate": 8.443691441036698e-05, "loss": 0.8474, "step": 160180 }, { "epoch": 2.812373812742499, "grad_norm": 0.0484868188051071, "learning_rate": 8.442592794800856e-05, "loss": 0.8451, "step": 160190 }, { "epoch": 2.8125493776225, "grad_norm": 0.0575641705197477, "learning_rate": 8.441494169564686e-05, "loss": 0.834, "step": 160200 }, { "epoch": 2.812724942502502, "grad_norm": 0.0745061800363056, "learning_rate": 8.440395565342099e-05, "loss": 0.8451, "step": 160210 }, { "epoch": 2.812900507382503, "grad_norm": 0.06997591147753346, "learning_rate": 8.439296982147016e-05, "loss": 0.8437, "step": 160220 }, { "epoch": 2.8130760722625046, "grad_norm": 0.04496761981868282, "learning_rate": 8.438198419993365e-05, "loss": 0.8355, "step": 160230 }, { "epoch": 2.813251637142506, "grad_norm": 0.041772632620062866, "learning_rate": 8.437099878895055e-05, "loss": 0.8443, "step": 160240 }, { "epoch": 2.8134272020225075, "grad_norm": 0.051967683876474394, "learning_rate": 8.436001358866009e-05, "loss": 0.8404, "step": 160250 }, { "epoch": 2.813602766902509, "grad_norm": 0.04373002151529893, "learning_rate": 8.434902859920143e-05, "loss": 0.846, "step": 160260 }, { "epoch": 2.81377833178251, "grad_norm": 0.059664531802383486, "learning_rate": 8.433804382071379e-05, "loss": 0.8422, "step": 160270 }, { "epoch": 2.8139538966625115, "grad_norm": 0.055966461313222846, "learning_rate": 8.432705925333634e-05, "loss": 0.841, "step": 160280 }, { "epoch": 2.814129461542513, "grad_norm": 0.11555489421721625, "learning_rate": 8.431607489720817e-05, "loss": 0.8306, "step": 160290 }, { "epoch": 2.8143050264225145, "grad_norm": 0.050466665874079795, "learning_rate": 8.430509075246862e-05, "loss": 0.84, "step": 160300 }, { "epoch": 2.814480591302516, "grad_norm": 0.053472563371376354, "learning_rate": 8.429410681925674e-05, "loss": 0.8426, "step": 160310 }, { "epoch": 2.8146561561825174, "grad_norm": 0.0570417131838305, "learning_rate": 8.428312309771173e-05, "loss": 0.8409, "step": 160320 }, { "epoch": 2.814831721062519, "grad_norm": 0.07247368502504879, "learning_rate": 8.427213958797271e-05, "loss": 0.8361, "step": 160330 }, { "epoch": 2.81500728594252, "grad_norm": 0.059485401650077596, "learning_rate": 8.426115629017893e-05, "loss": 0.8367, "step": 160340 }, { "epoch": 2.8151828508225214, "grad_norm": 0.04759677880549446, "learning_rate": 8.425017320446951e-05, "loss": 0.8443, "step": 160350 }, { "epoch": 2.815358415702523, "grad_norm": 0.0558809618167103, "learning_rate": 8.423919033098358e-05, "loss": 0.8419, "step": 160360 }, { "epoch": 2.8155339805825244, "grad_norm": 0.050405992100773664, "learning_rate": 8.422820766986035e-05, "loss": 0.8421, "step": 160370 }, { "epoch": 2.815709545462526, "grad_norm": 0.05105944068409853, "learning_rate": 8.421722522123894e-05, "loss": 0.8427, "step": 160380 }, { "epoch": 2.815885110342527, "grad_norm": 0.051126352641093366, "learning_rate": 8.420624298525846e-05, "loss": 0.8429, "step": 160390 }, { "epoch": 2.8160606752225283, "grad_norm": 0.06597960371656657, "learning_rate": 8.419526096205815e-05, "loss": 0.8428, "step": 160400 }, { "epoch": 2.81623624010253, "grad_norm": 0.05903723913568439, "learning_rate": 8.418427915177708e-05, "loss": 0.8458, "step": 160410 }, { "epoch": 2.8164118049825313, "grad_norm": 0.04878496246932129, "learning_rate": 8.417329755455444e-05, "loss": 0.8572, "step": 160420 }, { "epoch": 2.8165873698625328, "grad_norm": 0.05781724038648827, "learning_rate": 8.41623161705293e-05, "loss": 0.8342, "step": 160430 }, { "epoch": 2.8167629347425343, "grad_norm": 0.06317435376389822, "learning_rate": 8.415133499984088e-05, "loss": 0.8467, "step": 160440 }, { "epoch": 2.8169384996225357, "grad_norm": 0.04822215327793413, "learning_rate": 8.414035404262827e-05, "loss": 0.8415, "step": 160450 }, { "epoch": 2.8171140645025368, "grad_norm": 0.06179800779636427, "learning_rate": 8.412937329903057e-05, "loss": 0.8394, "step": 160460 }, { "epoch": 2.8172896293825382, "grad_norm": 0.06918561973666568, "learning_rate": 8.4118392769187e-05, "loss": 0.8354, "step": 160470 }, { "epoch": 2.8174651942625397, "grad_norm": 0.050660734829735814, "learning_rate": 8.41074124532366e-05, "loss": 0.8402, "step": 160480 }, { "epoch": 2.817640759142541, "grad_norm": 0.04159047899303103, "learning_rate": 8.409643235131854e-05, "loss": 0.8346, "step": 160490 }, { "epoch": 2.8178163240225427, "grad_norm": 0.05672225986691627, "learning_rate": 8.408545246357185e-05, "loss": 0.8403, "step": 160500 }, { "epoch": 2.8179918889025437, "grad_norm": 0.05574511987506748, "learning_rate": 8.40744727901358e-05, "loss": 0.8385, "step": 160510 }, { "epoch": 2.818167453782545, "grad_norm": 0.0688718831689664, "learning_rate": 8.406349333114941e-05, "loss": 0.8402, "step": 160520 }, { "epoch": 2.8183430186625467, "grad_norm": 0.045608467189816974, "learning_rate": 8.405251408675177e-05, "loss": 0.8379, "step": 160530 }, { "epoch": 2.818518583542548, "grad_norm": 0.05085523997679157, "learning_rate": 8.404153505708206e-05, "loss": 0.846, "step": 160540 }, { "epoch": 2.8186941484225496, "grad_norm": 0.05727174233175065, "learning_rate": 8.403055624227935e-05, "loss": 0.8405, "step": 160550 }, { "epoch": 2.818869713302551, "grad_norm": 0.05380111863542939, "learning_rate": 8.401957764248268e-05, "loss": 0.8464, "step": 160560 }, { "epoch": 2.8190452781825526, "grad_norm": 0.07387734321479636, "learning_rate": 8.400859925783129e-05, "loss": 0.8466, "step": 160570 }, { "epoch": 2.8192208430625536, "grad_norm": 0.05277215242188288, "learning_rate": 8.399762108846419e-05, "loss": 0.8431, "step": 160580 }, { "epoch": 2.819396407942555, "grad_norm": 0.06575437184227235, "learning_rate": 8.398664313452048e-05, "loss": 0.844, "step": 160590 }, { "epoch": 2.8195719728225566, "grad_norm": 0.0647803792997902, "learning_rate": 8.397566539613925e-05, "loss": 0.8385, "step": 160600 }, { "epoch": 2.819747537702558, "grad_norm": 0.07371792463609031, "learning_rate": 8.396468787345962e-05, "loss": 0.84, "step": 160610 }, { "epoch": 2.8199231025825595, "grad_norm": 0.06459258822254475, "learning_rate": 8.395371056662065e-05, "loss": 0.8506, "step": 160620 }, { "epoch": 2.8200986674625605, "grad_norm": 0.06244756732189531, "learning_rate": 8.394273347576141e-05, "loss": 0.8484, "step": 160630 }, { "epoch": 2.8202742323425625, "grad_norm": 0.04252185642260427, "learning_rate": 8.393175660102107e-05, "loss": 0.8362, "step": 160640 }, { "epoch": 2.8204497972225635, "grad_norm": 0.052231924657637645, "learning_rate": 8.392077994253862e-05, "loss": 0.8495, "step": 160650 }, { "epoch": 2.820625362102565, "grad_norm": 0.043214377114657705, "learning_rate": 8.390980350045317e-05, "loss": 0.8423, "step": 160660 }, { "epoch": 2.8208009269825665, "grad_norm": 0.05601303219764068, "learning_rate": 8.389882727490372e-05, "loss": 0.8435, "step": 160670 }, { "epoch": 2.820976491862568, "grad_norm": 0.06155243052001449, "learning_rate": 8.388785126602946e-05, "loss": 0.8485, "step": 160680 }, { "epoch": 2.8211520567425694, "grad_norm": 0.05992365579731497, "learning_rate": 8.38768754739694e-05, "loss": 0.8431, "step": 160690 }, { "epoch": 2.8213276216225704, "grad_norm": 0.06370609179867602, "learning_rate": 8.38658998988626e-05, "loss": 0.8362, "step": 160700 }, { "epoch": 2.821503186502572, "grad_norm": 0.056898203181916095, "learning_rate": 8.385492454084814e-05, "loss": 0.852, "step": 160710 }, { "epoch": 2.8216787513825734, "grad_norm": 0.059579429617853744, "learning_rate": 8.384394940006508e-05, "loss": 0.847, "step": 160720 }, { "epoch": 2.821854316262575, "grad_norm": 0.060412728889174795, "learning_rate": 8.383297447665241e-05, "loss": 0.8426, "step": 160730 }, { "epoch": 2.8220298811425764, "grad_norm": 0.05067039840667377, "learning_rate": 8.382199977074931e-05, "loss": 0.8401, "step": 160740 }, { "epoch": 2.8222054460225774, "grad_norm": 0.0481578999753639, "learning_rate": 8.381102528249475e-05, "loss": 0.8471, "step": 160750 }, { "epoch": 2.8223810109025793, "grad_norm": 0.05291318320412409, "learning_rate": 8.38000510120278e-05, "loss": 0.8459, "step": 160760 }, { "epoch": 2.8225565757825803, "grad_norm": 0.04984936321336054, "learning_rate": 8.378907695948748e-05, "loss": 0.8389, "step": 160770 }, { "epoch": 2.822732140662582, "grad_norm": 0.04695901713969476, "learning_rate": 8.377810312501287e-05, "loss": 0.8424, "step": 160780 }, { "epoch": 2.8229077055425833, "grad_norm": 0.06951030595853698, "learning_rate": 8.376712950874299e-05, "loss": 0.8402, "step": 160790 }, { "epoch": 2.823083270422585, "grad_norm": 0.07047893547175117, "learning_rate": 8.375615611081685e-05, "loss": 0.8535, "step": 160800 }, { "epoch": 2.8232588353025863, "grad_norm": 0.05202334726848883, "learning_rate": 8.374518293137357e-05, "loss": 0.8423, "step": 160810 }, { "epoch": 2.8234344001825873, "grad_norm": 0.056331920665803295, "learning_rate": 8.373420997055213e-05, "loss": 0.8404, "step": 160820 }, { "epoch": 2.8236099650625888, "grad_norm": 0.05494773620617497, "learning_rate": 8.372323722849154e-05, "loss": 0.8439, "step": 160830 }, { "epoch": 2.8237855299425902, "grad_norm": 0.049669524179407676, "learning_rate": 8.37122647053308e-05, "loss": 0.8445, "step": 160840 }, { "epoch": 2.8239610948225917, "grad_norm": 0.06594467178127901, "learning_rate": 8.370129240120902e-05, "loss": 0.8425, "step": 160850 }, { "epoch": 2.824136659702593, "grad_norm": 0.06672538171476199, "learning_rate": 8.369032031626519e-05, "loss": 0.8413, "step": 160860 }, { "epoch": 2.8243122245825942, "grad_norm": 0.04329497888010286, "learning_rate": 8.36793484506383e-05, "loss": 0.8496, "step": 160870 }, { "epoch": 2.824487789462596, "grad_norm": 0.0563431323062391, "learning_rate": 8.366837680446739e-05, "loss": 0.8421, "step": 160880 }, { "epoch": 2.824663354342597, "grad_norm": 0.06598508460957991, "learning_rate": 8.365740537789148e-05, "loss": 0.8457, "step": 160890 }, { "epoch": 2.8248389192225987, "grad_norm": 0.06338171391397843, "learning_rate": 8.364643417104953e-05, "loss": 0.843, "step": 160900 }, { "epoch": 2.8250144841026, "grad_norm": 0.053133144676679275, "learning_rate": 8.36354631840806e-05, "loss": 0.8532, "step": 160910 }, { "epoch": 2.8251900489826016, "grad_norm": 0.06638971560757652, "learning_rate": 8.362449241712372e-05, "loss": 0.8475, "step": 160920 }, { "epoch": 2.825365613862603, "grad_norm": 0.049766892012850415, "learning_rate": 8.361352187031783e-05, "loss": 0.8443, "step": 160930 }, { "epoch": 2.825541178742604, "grad_norm": 0.05312968151168612, "learning_rate": 8.360255154380193e-05, "loss": 0.8423, "step": 160940 }, { "epoch": 2.8257167436226056, "grad_norm": 0.050932177349733786, "learning_rate": 8.359158143771505e-05, "loss": 0.848, "step": 160950 }, { "epoch": 2.825892308502607, "grad_norm": 0.05383920867891556, "learning_rate": 8.358061155219618e-05, "loss": 0.8433, "step": 160960 }, { "epoch": 2.8260678733826086, "grad_norm": 0.04994403400387315, "learning_rate": 8.356964188738425e-05, "loss": 0.8465, "step": 160970 }, { "epoch": 2.82624343826261, "grad_norm": 0.0713052962373993, "learning_rate": 8.355867244341837e-05, "loss": 0.8341, "step": 160980 }, { "epoch": 2.826419003142611, "grad_norm": 0.046884375364308874, "learning_rate": 8.354770322043744e-05, "loss": 0.8499, "step": 160990 }, { "epoch": 2.826594568022613, "grad_norm": 0.0901140477719118, "learning_rate": 8.353673421858043e-05, "loss": 0.8422, "step": 161000 }, { "epoch": 2.826770132902614, "grad_norm": 0.0464544426222421, "learning_rate": 8.352576543798632e-05, "loss": 0.8485, "step": 161010 }, { "epoch": 2.8269456977826155, "grad_norm": 0.05938897461276247, "learning_rate": 8.351479687879414e-05, "loss": 0.8429, "step": 161020 }, { "epoch": 2.827121262662617, "grad_norm": 0.05233920933364172, "learning_rate": 8.350382854114284e-05, "loss": 0.8376, "step": 161030 }, { "epoch": 2.8272968275426185, "grad_norm": 0.058200493850367865, "learning_rate": 8.349286042517137e-05, "loss": 0.8475, "step": 161040 }, { "epoch": 2.82747239242262, "grad_norm": 0.05315796691585697, "learning_rate": 8.348189253101873e-05, "loss": 0.8484, "step": 161050 }, { "epoch": 2.827647957302621, "grad_norm": 0.05280917913489597, "learning_rate": 8.347092485882387e-05, "loss": 0.8432, "step": 161060 }, { "epoch": 2.8278235221826225, "grad_norm": 0.057598187507443895, "learning_rate": 8.34599574087257e-05, "loss": 0.8459, "step": 161070 }, { "epoch": 2.827999087062624, "grad_norm": 0.05708825997686406, "learning_rate": 8.344899018086329e-05, "loss": 0.8427, "step": 161080 }, { "epoch": 2.8281746519426254, "grad_norm": 0.05298927480658464, "learning_rate": 8.343802317537553e-05, "loss": 0.8415, "step": 161090 }, { "epoch": 2.828350216822627, "grad_norm": 0.05091246725504582, "learning_rate": 8.34270563924014e-05, "loss": 0.8506, "step": 161100 }, { "epoch": 2.8285257817026284, "grad_norm": 0.075943728040945, "learning_rate": 8.341608983207978e-05, "loss": 0.8405, "step": 161110 }, { "epoch": 2.82870134658263, "grad_norm": 0.07144087086614326, "learning_rate": 8.34051234945497e-05, "loss": 0.8382, "step": 161120 }, { "epoch": 2.828876911462631, "grad_norm": 0.049003354466764945, "learning_rate": 8.33941573799501e-05, "loss": 0.8476, "step": 161130 }, { "epoch": 2.8290524763426323, "grad_norm": 0.06304019585611115, "learning_rate": 8.338319148841984e-05, "loss": 0.8359, "step": 161140 }, { "epoch": 2.829228041222634, "grad_norm": 0.04821670338258324, "learning_rate": 8.337222582009799e-05, "loss": 0.8478, "step": 161150 }, { "epoch": 2.8294036061026353, "grad_norm": 0.04222361871720223, "learning_rate": 8.336126037512339e-05, "loss": 0.834, "step": 161160 }, { "epoch": 2.829579170982637, "grad_norm": 0.0641254436980975, "learning_rate": 8.3350295153635e-05, "loss": 0.8408, "step": 161170 }, { "epoch": 2.829754735862638, "grad_norm": 0.09679599544049684, "learning_rate": 8.333933015577172e-05, "loss": 0.8412, "step": 161180 }, { "epoch": 2.8299303007426393, "grad_norm": 0.05031183709937867, "learning_rate": 8.332836538167255e-05, "loss": 0.8408, "step": 161190 }, { "epoch": 2.8301058656226408, "grad_norm": 0.04880848014918155, "learning_rate": 8.331740083147638e-05, "loss": 0.8386, "step": 161200 }, { "epoch": 2.8302814305026422, "grad_norm": 0.0546806108383019, "learning_rate": 8.33064365053221e-05, "loss": 0.8396, "step": 161210 }, { "epoch": 2.8304569953826437, "grad_norm": 0.04944813041339866, "learning_rate": 8.329547240334869e-05, "loss": 0.8416, "step": 161220 }, { "epoch": 2.830632560262645, "grad_norm": 0.07125638484630656, "learning_rate": 8.328450852569503e-05, "loss": 0.8385, "step": 161230 }, { "epoch": 2.8308081251426467, "grad_norm": 0.056433657817796426, "learning_rate": 8.32735448725e-05, "loss": 0.8411, "step": 161240 }, { "epoch": 2.8309836900226477, "grad_norm": 0.06300921345140825, "learning_rate": 8.32625814439026e-05, "loss": 0.8426, "step": 161250 }, { "epoch": 2.831159254902649, "grad_norm": 0.05106785909013939, "learning_rate": 8.32516182400417e-05, "loss": 0.8423, "step": 161260 }, { "epoch": 2.8313348197826507, "grad_norm": 0.05989199990080894, "learning_rate": 8.324065526105619e-05, "loss": 0.8405, "step": 161270 }, { "epoch": 2.831510384662652, "grad_norm": 0.05336664831684142, "learning_rate": 8.322969250708497e-05, "loss": 0.8386, "step": 161280 }, { "epoch": 2.8316859495426536, "grad_norm": 0.054615930934829755, "learning_rate": 8.321872997826696e-05, "loss": 0.8497, "step": 161290 }, { "epoch": 2.8318615144226547, "grad_norm": 0.04819567333642558, "learning_rate": 8.320776767474105e-05, "loss": 0.8455, "step": 161300 }, { "epoch": 2.832037079302656, "grad_norm": 0.04633640998796185, "learning_rate": 8.31968055966461e-05, "loss": 0.8405, "step": 161310 }, { "epoch": 2.8322126441826576, "grad_norm": 0.04818204050425656, "learning_rate": 8.318584374412109e-05, "loss": 0.8443, "step": 161320 }, { "epoch": 2.832388209062659, "grad_norm": 0.04548776383061095, "learning_rate": 8.317488211730483e-05, "loss": 0.8473, "step": 161330 }, { "epoch": 2.8325637739426606, "grad_norm": 0.055742034853920513, "learning_rate": 8.316392071633624e-05, "loss": 0.8441, "step": 161340 }, { "epoch": 2.832739338822662, "grad_norm": 0.04621637588001382, "learning_rate": 8.315295954135415e-05, "loss": 0.8437, "step": 161350 }, { "epoch": 2.8329149037026635, "grad_norm": 0.05214606693068586, "learning_rate": 8.314199859249752e-05, "loss": 0.8488, "step": 161360 }, { "epoch": 2.8330904685826646, "grad_norm": 0.05342567359676524, "learning_rate": 8.313103786990518e-05, "loss": 0.8477, "step": 161370 }, { "epoch": 2.833266033462666, "grad_norm": 0.05732042329931968, "learning_rate": 8.312007737371601e-05, "loss": 0.838, "step": 161380 }, { "epoch": 2.8334415983426675, "grad_norm": 0.042875719247276387, "learning_rate": 8.310911710406891e-05, "loss": 0.844, "step": 161390 }, { "epoch": 2.833617163222669, "grad_norm": 0.05629729384410281, "learning_rate": 8.309815706110271e-05, "loss": 0.8379, "step": 161400 }, { "epoch": 2.8337927281026705, "grad_norm": 0.045939402641018305, "learning_rate": 8.308719724495625e-05, "loss": 0.8451, "step": 161410 }, { "epoch": 2.8339682929826715, "grad_norm": 0.054227105442147844, "learning_rate": 8.30762376557685e-05, "loss": 0.8528, "step": 161420 }, { "epoch": 2.8341438578626734, "grad_norm": 0.05159681739641528, "learning_rate": 8.306527829367821e-05, "loss": 0.835, "step": 161430 }, { "epoch": 2.8343194227426745, "grad_norm": 0.048048390966518056, "learning_rate": 8.305431915882432e-05, "loss": 0.8349, "step": 161440 }, { "epoch": 2.834494987622676, "grad_norm": 0.06703813625824395, "learning_rate": 8.304336025134561e-05, "loss": 0.8408, "step": 161450 }, { "epoch": 2.8346705525026774, "grad_norm": 0.07120008713807575, "learning_rate": 8.3032401571381e-05, "loss": 0.8393, "step": 161460 }, { "epoch": 2.834846117382679, "grad_norm": 0.059988475697522324, "learning_rate": 8.302144311906929e-05, "loss": 0.839, "step": 161470 }, { "epoch": 2.8350216822626804, "grad_norm": 0.05178536138157026, "learning_rate": 8.301048489454931e-05, "loss": 0.845, "step": 161480 }, { "epoch": 2.8351972471426814, "grad_norm": 0.042932550769479556, "learning_rate": 8.299952689795999e-05, "loss": 0.8503, "step": 161490 }, { "epoch": 2.835372812022683, "grad_norm": 0.051540638972817215, "learning_rate": 8.29885691294401e-05, "loss": 0.8396, "step": 161500 }, { "epoch": 2.8355483769026844, "grad_norm": 0.057603776614736237, "learning_rate": 8.297761158912846e-05, "loss": 0.8523, "step": 161510 }, { "epoch": 2.835723941782686, "grad_norm": 0.054455439314500365, "learning_rate": 8.296665427716393e-05, "loss": 0.8425, "step": 161520 }, { "epoch": 2.8358995066626873, "grad_norm": 0.05497401025630339, "learning_rate": 8.295569719368537e-05, "loss": 0.8375, "step": 161530 }, { "epoch": 2.8360750715426883, "grad_norm": 0.04273922057684765, "learning_rate": 8.29447403388316e-05, "loss": 0.8477, "step": 161540 }, { "epoch": 2.8362506364226903, "grad_norm": 0.045899771442003974, "learning_rate": 8.29337837127414e-05, "loss": 0.8365, "step": 161550 }, { "epoch": 2.8364262013026913, "grad_norm": 0.05099709320510753, "learning_rate": 8.292282731555365e-05, "loss": 0.8426, "step": 161560 }, { "epoch": 2.8366017661826928, "grad_norm": 0.05440906384108579, "learning_rate": 8.291187114740713e-05, "loss": 0.8423, "step": 161570 }, { "epoch": 2.8367773310626943, "grad_norm": 0.0489468974967421, "learning_rate": 8.290091520844063e-05, "loss": 0.8435, "step": 161580 }, { "epoch": 2.8369528959426957, "grad_norm": 0.07604436633873468, "learning_rate": 8.288995949879307e-05, "loss": 0.8422, "step": 161590 }, { "epoch": 2.837128460822697, "grad_norm": 0.05191956805488617, "learning_rate": 8.287900401860316e-05, "loss": 0.8522, "step": 161600 }, { "epoch": 2.8373040257026982, "grad_norm": 0.06790531270237672, "learning_rate": 8.286804876800977e-05, "loss": 0.8424, "step": 161610 }, { "epoch": 2.8374795905826997, "grad_norm": 0.05377815333362147, "learning_rate": 8.285709374715163e-05, "loss": 0.8381, "step": 161620 }, { "epoch": 2.837655155462701, "grad_norm": 0.04891461672490368, "learning_rate": 8.284613895616763e-05, "loss": 0.8398, "step": 161630 }, { "epoch": 2.8378307203427027, "grad_norm": 0.05765333970875278, "learning_rate": 8.28351843951965e-05, "loss": 0.8339, "step": 161640 }, { "epoch": 2.838006285222704, "grad_norm": 0.05066845573257661, "learning_rate": 8.282423006437706e-05, "loss": 0.8461, "step": 161650 }, { "epoch": 2.838181850102705, "grad_norm": 0.05164358603181113, "learning_rate": 8.281327596384816e-05, "loss": 0.8389, "step": 161660 }, { "epoch": 2.838357414982707, "grad_norm": 0.05853827701392623, "learning_rate": 8.280232209374851e-05, "loss": 0.8405, "step": 161670 }, { "epoch": 2.838532979862708, "grad_norm": 0.057493381565370254, "learning_rate": 8.279136845421694e-05, "loss": 0.8501, "step": 161680 }, { "epoch": 2.8387085447427096, "grad_norm": 0.0621057140740637, "learning_rate": 8.278041504539216e-05, "loss": 0.8334, "step": 161690 }, { "epoch": 2.838884109622711, "grad_norm": 0.07335919981819886, "learning_rate": 8.276946186741306e-05, "loss": 0.8445, "step": 161700 }, { "epoch": 2.8390596745027126, "grad_norm": 0.050989536919732584, "learning_rate": 8.275850892041838e-05, "loss": 0.8403, "step": 161710 }, { "epoch": 2.839235239382714, "grad_norm": 0.07052997982731116, "learning_rate": 8.274755620454686e-05, "loss": 0.8407, "step": 161720 }, { "epoch": 2.839410804262715, "grad_norm": 0.05714342611283918, "learning_rate": 8.273660371993733e-05, "loss": 0.8434, "step": 161730 }, { "epoch": 2.8395863691427166, "grad_norm": 0.04943065958952161, "learning_rate": 8.272565146672851e-05, "loss": 0.8476, "step": 161740 }, { "epoch": 2.839761934022718, "grad_norm": 0.05001586954645202, "learning_rate": 8.271469944505917e-05, "loss": 0.8462, "step": 161750 }, { "epoch": 2.8399374989027195, "grad_norm": 0.06562455709767095, "learning_rate": 8.270374765506813e-05, "loss": 0.8372, "step": 161760 }, { "epoch": 2.840113063782721, "grad_norm": 0.04571430575003914, "learning_rate": 8.269279609689411e-05, "loss": 0.8455, "step": 161770 }, { "epoch": 2.8402886286627225, "grad_norm": 0.05348535368597109, "learning_rate": 8.268184477067586e-05, "loss": 0.8391, "step": 161780 }, { "epoch": 2.840464193542724, "grad_norm": 0.06045641150486199, "learning_rate": 8.267089367655215e-05, "loss": 0.8376, "step": 161790 }, { "epoch": 2.840639758422725, "grad_norm": 0.053890183527500614, "learning_rate": 8.265994281466173e-05, "loss": 0.8457, "step": 161800 }, { "epoch": 2.8408153233027265, "grad_norm": 0.04276363932334596, "learning_rate": 8.264899218514336e-05, "loss": 0.8426, "step": 161810 }, { "epoch": 2.840990888182728, "grad_norm": 0.05370237270740511, "learning_rate": 8.263804178813574e-05, "loss": 0.8383, "step": 161820 }, { "epoch": 2.8411664530627294, "grad_norm": 0.06349509882973133, "learning_rate": 8.262709162377769e-05, "loss": 0.848, "step": 161830 }, { "epoch": 2.841342017942731, "grad_norm": 0.06307543752785787, "learning_rate": 8.261614169220793e-05, "loss": 0.837, "step": 161840 }, { "epoch": 2.841517582822732, "grad_norm": 0.055538084809664094, "learning_rate": 8.260519199356513e-05, "loss": 0.8417, "step": 161850 }, { "epoch": 2.8416931477027334, "grad_norm": 0.06963862828543421, "learning_rate": 8.259424252798806e-05, "loss": 0.8452, "step": 161860 }, { "epoch": 2.841868712582735, "grad_norm": 0.04981898396865155, "learning_rate": 8.258329329561549e-05, "loss": 0.8394, "step": 161870 }, { "epoch": 2.8420442774627364, "grad_norm": 0.06577278843000728, "learning_rate": 8.257234429658616e-05, "loss": 0.8421, "step": 161880 }, { "epoch": 2.842219842342738, "grad_norm": 0.06773542502098678, "learning_rate": 8.25613955310387e-05, "loss": 0.8421, "step": 161890 }, { "epoch": 2.8423954072227393, "grad_norm": 0.047970059472273216, "learning_rate": 8.255044699911192e-05, "loss": 0.8511, "step": 161900 }, { "epoch": 2.842570972102741, "grad_norm": 0.060730390263423244, "learning_rate": 8.253949870094453e-05, "loss": 0.8386, "step": 161910 }, { "epoch": 2.842746536982742, "grad_norm": 0.04989283469599602, "learning_rate": 8.252855063667517e-05, "loss": 0.8478, "step": 161920 }, { "epoch": 2.8429221018627433, "grad_norm": 0.050557443333727935, "learning_rate": 8.251760280644266e-05, "loss": 0.8416, "step": 161930 }, { "epoch": 2.8430976667427448, "grad_norm": 0.06063688754617192, "learning_rate": 8.250665521038566e-05, "loss": 0.8464, "step": 161940 }, { "epoch": 2.8432732316227463, "grad_norm": 0.07570491864730164, "learning_rate": 8.249570784864289e-05, "loss": 0.8478, "step": 161950 }, { "epoch": 2.8434487965027477, "grad_norm": 0.06903430478739418, "learning_rate": 8.248476072135302e-05, "loss": 0.8382, "step": 161960 }, { "epoch": 2.8436243613827488, "grad_norm": 0.057449822209230006, "learning_rate": 8.247381382865481e-05, "loss": 0.8384, "step": 161970 }, { "epoch": 2.8437999262627502, "grad_norm": 0.04790329099495463, "learning_rate": 8.246286717068693e-05, "loss": 0.844, "step": 161980 }, { "epoch": 2.8439754911427517, "grad_norm": 0.05456021561389492, "learning_rate": 8.245192074758802e-05, "loss": 0.8323, "step": 161990 }, { "epoch": 2.844151056022753, "grad_norm": 0.044150882708937134, "learning_rate": 8.244097455949691e-05, "loss": 0.8345, "step": 162000 }, { "epoch": 2.8443266209027547, "grad_norm": 0.061365970101624485, "learning_rate": 8.243002860655217e-05, "loss": 0.8435, "step": 162010 }, { "epoch": 2.844502185782756, "grad_norm": 0.049108490992043154, "learning_rate": 8.241908288889254e-05, "loss": 0.8466, "step": 162020 }, { "epoch": 2.8446777506627576, "grad_norm": 0.04934576339567541, "learning_rate": 8.240813740665665e-05, "loss": 0.8391, "step": 162030 }, { "epoch": 2.8448533155427587, "grad_norm": 0.0481653560124774, "learning_rate": 8.239719215998326e-05, "loss": 0.8442, "step": 162040 }, { "epoch": 2.84502888042276, "grad_norm": 0.04840076522613405, "learning_rate": 8.2386247149011e-05, "loss": 0.8406, "step": 162050 }, { "epoch": 2.8452044453027616, "grad_norm": 0.061527903275703684, "learning_rate": 8.237530237387854e-05, "loss": 0.8385, "step": 162060 }, { "epoch": 2.845380010182763, "grad_norm": 0.05804447228935628, "learning_rate": 8.236435783472459e-05, "loss": 0.8405, "step": 162070 }, { "epoch": 2.8455555750627646, "grad_norm": 0.048735179238661865, "learning_rate": 8.23534135316878e-05, "loss": 0.8447, "step": 162080 }, { "epoch": 2.8457311399427656, "grad_norm": 0.06534405839571197, "learning_rate": 8.234246946490679e-05, "loss": 0.832, "step": 162090 }, { "epoch": 2.8459067048227675, "grad_norm": 0.04767505760847162, "learning_rate": 8.233152563452032e-05, "loss": 0.8382, "step": 162100 }, { "epoch": 2.8460822697027686, "grad_norm": 0.05740396988213716, "learning_rate": 8.232058204066699e-05, "loss": 0.8471, "step": 162110 }, { "epoch": 2.84625783458277, "grad_norm": 0.05784867177467993, "learning_rate": 8.230963868348547e-05, "loss": 0.8397, "step": 162120 }, { "epoch": 2.8464333994627715, "grad_norm": 0.048418751412068, "learning_rate": 8.229869556311438e-05, "loss": 0.8409, "step": 162130 }, { "epoch": 2.846608964342773, "grad_norm": 0.04359847623543304, "learning_rate": 8.228775267969244e-05, "loss": 0.8458, "step": 162140 }, { "epoch": 2.8467845292227745, "grad_norm": 0.05461070574660009, "learning_rate": 8.227681003335824e-05, "loss": 0.8416, "step": 162150 }, { "epoch": 2.8469600941027755, "grad_norm": 0.04721975327462989, "learning_rate": 8.226586762425041e-05, "loss": 0.8453, "step": 162160 }, { "epoch": 2.847135658982777, "grad_norm": 0.0529950570799471, "learning_rate": 8.22549254525077e-05, "loss": 0.8462, "step": 162170 }, { "epoch": 2.8473112238627785, "grad_norm": 0.04584626356035472, "learning_rate": 8.224398351826864e-05, "loss": 0.8467, "step": 162180 }, { "epoch": 2.84748678874278, "grad_norm": 0.06754019899847151, "learning_rate": 8.223304182167191e-05, "loss": 0.8437, "step": 162190 }, { "epoch": 2.8476623536227814, "grad_norm": 0.0564330543232566, "learning_rate": 8.222210036285609e-05, "loss": 0.8446, "step": 162200 }, { "epoch": 2.8478379185027825, "grad_norm": 0.03901778955614253, "learning_rate": 8.221115914195991e-05, "loss": 0.8471, "step": 162210 }, { "epoch": 2.8480134833827844, "grad_norm": 0.04232536870274305, "learning_rate": 8.220021815912193e-05, "loss": 0.8463, "step": 162220 }, { "epoch": 2.8481890482627854, "grad_norm": 0.04309869886852857, "learning_rate": 8.218927741448079e-05, "loss": 0.8401, "step": 162230 }, { "epoch": 2.848364613142787, "grad_norm": 0.06819919043603538, "learning_rate": 8.21783369081751e-05, "loss": 0.8427, "step": 162240 }, { "epoch": 2.8485401780227884, "grad_norm": 0.05102213578603289, "learning_rate": 8.216739664034352e-05, "loss": 0.842, "step": 162250 }, { "epoch": 2.84871574290279, "grad_norm": 0.05593126484714346, "learning_rate": 8.215645661112457e-05, "loss": 0.8423, "step": 162260 }, { "epoch": 2.8488913077827913, "grad_norm": 0.05405296895021275, "learning_rate": 8.214551682065697e-05, "loss": 0.8446, "step": 162270 }, { "epoch": 2.8490668726627923, "grad_norm": 0.055243858955163545, "learning_rate": 8.21345772690793e-05, "loss": 0.8337, "step": 162280 }, { "epoch": 2.849242437542794, "grad_norm": 0.05704205494363902, "learning_rate": 8.212363795653015e-05, "loss": 0.8361, "step": 162290 }, { "epoch": 2.8494180024227953, "grad_norm": 0.059050960496171485, "learning_rate": 8.211269888314809e-05, "loss": 0.8432, "step": 162300 }, { "epoch": 2.849593567302797, "grad_norm": 0.058757388581464204, "learning_rate": 8.210176004907178e-05, "loss": 0.8458, "step": 162310 }, { "epoch": 2.8497691321827983, "grad_norm": 0.045474684258182636, "learning_rate": 8.20908214544398e-05, "loss": 0.8344, "step": 162320 }, { "epoch": 2.8499446970627993, "grad_norm": 0.051263066291495366, "learning_rate": 8.207988309939068e-05, "loss": 0.837, "step": 162330 }, { "epoch": 2.850120261942801, "grad_norm": 0.05158000043847049, "learning_rate": 8.206894498406314e-05, "loss": 0.8434, "step": 162340 }, { "epoch": 2.8502958268228022, "grad_norm": 0.057232470353888246, "learning_rate": 8.205800710859567e-05, "loss": 0.8458, "step": 162350 }, { "epoch": 2.8504713917028037, "grad_norm": 0.05043335886361356, "learning_rate": 8.204706947312689e-05, "loss": 0.846, "step": 162360 }, { "epoch": 2.850646956582805, "grad_norm": 0.048233414457965654, "learning_rate": 8.203613207779531e-05, "loss": 0.8432, "step": 162370 }, { "epoch": 2.8508225214628067, "grad_norm": 0.04325784782450015, "learning_rate": 8.202519492273964e-05, "loss": 0.8422, "step": 162380 }, { "epoch": 2.850998086342808, "grad_norm": 0.04340542349814386, "learning_rate": 8.201425800809837e-05, "loss": 0.842, "step": 162390 }, { "epoch": 2.851173651222809, "grad_norm": 0.06657601465237896, "learning_rate": 8.200332133401008e-05, "loss": 0.8496, "step": 162400 }, { "epoch": 2.8513492161028107, "grad_norm": 0.05181655095759343, "learning_rate": 8.199238490061335e-05, "loss": 0.8425, "step": 162410 }, { "epoch": 2.851524780982812, "grad_norm": 0.06377610841499992, "learning_rate": 8.198144870804677e-05, "loss": 0.8478, "step": 162420 }, { "epoch": 2.8517003458628136, "grad_norm": 0.06373956813880655, "learning_rate": 8.197051275644881e-05, "loss": 0.8454, "step": 162430 }, { "epoch": 2.851875910742815, "grad_norm": 0.04798020461218606, "learning_rate": 8.195957704595816e-05, "loss": 0.8455, "step": 162440 }, { "epoch": 2.852051475622816, "grad_norm": 0.06165376899498941, "learning_rate": 8.194864157671333e-05, "loss": 0.842, "step": 162450 }, { "epoch": 2.852227040502818, "grad_norm": 0.05027855299825563, "learning_rate": 8.193770634885283e-05, "loss": 0.8385, "step": 162460 }, { "epoch": 2.852402605382819, "grad_norm": 0.06189387396670984, "learning_rate": 8.192677136251524e-05, "loss": 0.8407, "step": 162470 }, { "epoch": 2.8525781702628206, "grad_norm": 0.05852379800571084, "learning_rate": 8.191583661783915e-05, "loss": 0.8422, "step": 162480 }, { "epoch": 2.852753735142822, "grad_norm": 0.0668579467830791, "learning_rate": 8.190490211496306e-05, "loss": 0.8424, "step": 162490 }, { "epoch": 2.8529293000228235, "grad_norm": 0.06084699433088085, "learning_rate": 8.189396785402545e-05, "loss": 0.8504, "step": 162500 }, { "epoch": 2.853104864902825, "grad_norm": 0.04643516722405198, "learning_rate": 8.188303383516502e-05, "loss": 0.8419, "step": 162510 }, { "epoch": 2.853280429782826, "grad_norm": 0.04427016925593285, "learning_rate": 8.187210005852019e-05, "loss": 0.8408, "step": 162520 }, { "epoch": 2.8534559946628275, "grad_norm": 0.04654805850499281, "learning_rate": 8.18611665242295e-05, "loss": 0.8416, "step": 162530 }, { "epoch": 2.853631559542829, "grad_norm": 0.04931074456185344, "learning_rate": 8.185023323243148e-05, "loss": 0.8461, "step": 162540 }, { "epoch": 2.8538071244228305, "grad_norm": 0.059975398576798115, "learning_rate": 8.18393001832647e-05, "loss": 0.8433, "step": 162550 }, { "epoch": 2.853982689302832, "grad_norm": 0.047896083186611524, "learning_rate": 8.182836737686766e-05, "loss": 0.8437, "step": 162560 }, { "epoch": 2.8541582541828334, "grad_norm": 0.03806441206115155, "learning_rate": 8.181743481337888e-05, "loss": 0.8418, "step": 162570 }, { "epoch": 2.854333819062835, "grad_norm": 0.05583155728995068, "learning_rate": 8.180650249293688e-05, "loss": 0.8391, "step": 162580 }, { "epoch": 2.854509383942836, "grad_norm": 0.06705698091978694, "learning_rate": 8.179557041568018e-05, "loss": 0.8424, "step": 162590 }, { "epoch": 2.8546849488228374, "grad_norm": 0.061874443607374485, "learning_rate": 8.178463858174723e-05, "loss": 0.8368, "step": 162600 }, { "epoch": 2.854860513702839, "grad_norm": 0.05068795317931121, "learning_rate": 8.177370699127663e-05, "loss": 0.845, "step": 162610 }, { "epoch": 2.8550360785828404, "grad_norm": 0.05220231853010955, "learning_rate": 8.176277564440686e-05, "loss": 0.8437, "step": 162620 }, { "epoch": 2.855211643462842, "grad_norm": 0.06299384803963559, "learning_rate": 8.175184454127641e-05, "loss": 0.8479, "step": 162630 }, { "epoch": 2.855387208342843, "grad_norm": 0.05265288454350242, "learning_rate": 8.174091368202376e-05, "loss": 0.8409, "step": 162640 }, { "epoch": 2.8555627732228444, "grad_norm": 0.05068697016570266, "learning_rate": 8.172998306678743e-05, "loss": 0.8431, "step": 162650 }, { "epoch": 2.855738338102846, "grad_norm": 0.05939998687945758, "learning_rate": 8.171905269570592e-05, "loss": 0.8428, "step": 162660 }, { "epoch": 2.8559139029828473, "grad_norm": 0.05572139825493476, "learning_rate": 8.170812256891766e-05, "loss": 0.8452, "step": 162670 }, { "epoch": 2.856089467862849, "grad_norm": 0.0504388906431784, "learning_rate": 8.169719268656126e-05, "loss": 0.8389, "step": 162680 }, { "epoch": 2.8562650327428503, "grad_norm": 0.05848367196557318, "learning_rate": 8.168626304877508e-05, "loss": 0.8333, "step": 162690 }, { "epoch": 2.8564405976228517, "grad_norm": 0.06876380835322635, "learning_rate": 8.167533365569768e-05, "loss": 0.8362, "step": 162700 }, { "epoch": 2.8566161625028528, "grad_norm": 0.049126938089931405, "learning_rate": 8.166440450746745e-05, "loss": 0.8497, "step": 162710 }, { "epoch": 2.8567917273828543, "grad_norm": 0.052511292376052156, "learning_rate": 8.165347560422296e-05, "loss": 0.8446, "step": 162720 }, { "epoch": 2.8569672922628557, "grad_norm": 0.06256450476223914, "learning_rate": 8.164254694610264e-05, "loss": 0.8303, "step": 162730 }, { "epoch": 2.857142857142857, "grad_norm": 0.0549613630081144, "learning_rate": 8.163161853324495e-05, "loss": 0.8492, "step": 162740 }, { "epoch": 2.8573184220228587, "grad_norm": 0.05192751145455247, "learning_rate": 8.162069036578837e-05, "loss": 0.8372, "step": 162750 }, { "epoch": 2.8574939869028597, "grad_norm": 0.08167884062093288, "learning_rate": 8.160976244387137e-05, "loss": 0.848, "step": 162760 }, { "epoch": 2.857669551782861, "grad_norm": 0.057468442838540065, "learning_rate": 8.159883476763237e-05, "loss": 0.8375, "step": 162770 }, { "epoch": 2.8578451166628627, "grad_norm": 0.04778123679699133, "learning_rate": 8.158790733720986e-05, "loss": 0.8448, "step": 162780 }, { "epoch": 2.858020681542864, "grad_norm": 0.06323057607837164, "learning_rate": 8.157698015274229e-05, "loss": 0.8336, "step": 162790 }, { "epoch": 2.8581962464228656, "grad_norm": 0.0500788293298188, "learning_rate": 8.15660532143681e-05, "loss": 0.8392, "step": 162800 }, { "epoch": 2.858371811302867, "grad_norm": 0.07453984394980245, "learning_rate": 8.155512652222576e-05, "loss": 0.8476, "step": 162810 }, { "epoch": 2.8585473761828686, "grad_norm": 0.04918525062631145, "learning_rate": 8.154420007645367e-05, "loss": 0.8446, "step": 162820 }, { "epoch": 2.8587229410628696, "grad_norm": 0.04869536779840181, "learning_rate": 8.153327387719032e-05, "loss": 0.8451, "step": 162830 }, { "epoch": 2.858898505942871, "grad_norm": 0.057795593043404724, "learning_rate": 8.152234792457406e-05, "loss": 0.846, "step": 162840 }, { "epoch": 2.8590740708228726, "grad_norm": 0.0450387358080175, "learning_rate": 8.151142221874346e-05, "loss": 0.8337, "step": 162850 }, { "epoch": 2.859249635702874, "grad_norm": 0.0626789043935756, "learning_rate": 8.150049675983684e-05, "loss": 0.8383, "step": 162860 }, { "epoch": 2.8594252005828755, "grad_norm": 0.05638264363038742, "learning_rate": 8.148957154799268e-05, "loss": 0.8411, "step": 162870 }, { "epoch": 2.8596007654628766, "grad_norm": 0.04698034709406854, "learning_rate": 8.147864658334933e-05, "loss": 0.838, "step": 162880 }, { "epoch": 2.8597763303428785, "grad_norm": 0.0722142086377913, "learning_rate": 8.146772186604532e-05, "loss": 0.8414, "step": 162890 }, { "epoch": 2.8599518952228795, "grad_norm": 0.05024195956629383, "learning_rate": 8.145679739621903e-05, "loss": 0.8385, "step": 162900 }, { "epoch": 2.860127460102881, "grad_norm": 0.054024809805604444, "learning_rate": 8.144587317400882e-05, "loss": 0.8423, "step": 162910 }, { "epoch": 2.8603030249828825, "grad_norm": 0.06405160886122989, "learning_rate": 8.143494919955319e-05, "loss": 0.8426, "step": 162920 }, { "epoch": 2.860478589862884, "grad_norm": 0.07151645960102027, "learning_rate": 8.142402547299047e-05, "loss": 0.85, "step": 162930 }, { "epoch": 2.8606541547428854, "grad_norm": 0.059535665381651566, "learning_rate": 8.141310199445908e-05, "loss": 0.8363, "step": 162940 }, { "epoch": 2.8608297196228865, "grad_norm": 0.05272226771435376, "learning_rate": 8.140217876409748e-05, "loss": 0.846, "step": 162950 }, { "epoch": 2.861005284502888, "grad_norm": 0.052183812097720925, "learning_rate": 8.139125578204403e-05, "loss": 0.8449, "step": 162960 }, { "epoch": 2.8611808493828894, "grad_norm": 0.06616577647812245, "learning_rate": 8.138033304843712e-05, "loss": 0.8378, "step": 162970 }, { "epoch": 2.861356414262891, "grad_norm": 0.048023629795010056, "learning_rate": 8.136941056341513e-05, "loss": 0.8415, "step": 162980 }, { "epoch": 2.8615319791428924, "grad_norm": 0.0431467722183038, "learning_rate": 8.13584883271165e-05, "loss": 0.8381, "step": 162990 }, { "epoch": 2.8617075440228934, "grad_norm": 0.05246154289572494, "learning_rate": 8.13475663396796e-05, "loss": 0.8421, "step": 163000 }, { "epoch": 2.8618831089028953, "grad_norm": 0.06361487137104242, "learning_rate": 8.133664460124273e-05, "loss": 0.8434, "step": 163010 }, { "epoch": 2.8620586737828964, "grad_norm": 0.0708313659868788, "learning_rate": 8.132572311194441e-05, "loss": 0.8426, "step": 163020 }, { "epoch": 2.862234238662898, "grad_norm": 0.04956245823427152, "learning_rate": 8.131480187192295e-05, "loss": 0.8425, "step": 163030 }, { "epoch": 2.8624098035428993, "grad_norm": 0.057041988982654404, "learning_rate": 8.13038808813167e-05, "loss": 0.8416, "step": 163040 }, { "epoch": 2.862585368422901, "grad_norm": 0.06379251884335417, "learning_rate": 8.129296014026401e-05, "loss": 0.8442, "step": 163050 }, { "epoch": 2.8627609333029023, "grad_norm": 0.04755739622755285, "learning_rate": 8.128203964890337e-05, "loss": 0.84, "step": 163060 }, { "epoch": 2.8629364981829033, "grad_norm": 0.05182591020423575, "learning_rate": 8.127111940737303e-05, "loss": 0.8458, "step": 163070 }, { "epoch": 2.8631120630629048, "grad_norm": 0.05334332728264644, "learning_rate": 8.126019941581138e-05, "loss": 0.8351, "step": 163080 }, { "epoch": 2.8632876279429063, "grad_norm": 0.04600240054878718, "learning_rate": 8.12492796743568e-05, "loss": 0.8376, "step": 163090 }, { "epoch": 2.8634631928229077, "grad_norm": 0.05348331849959821, "learning_rate": 8.123836018314764e-05, "loss": 0.8395, "step": 163100 }, { "epoch": 2.863638757702909, "grad_norm": 0.06064516718761185, "learning_rate": 8.12274409423222e-05, "loss": 0.8426, "step": 163110 }, { "epoch": 2.8638143225829102, "grad_norm": 0.07285671059213056, "learning_rate": 8.121652195201892e-05, "loss": 0.8464, "step": 163120 }, { "epoch": 2.863989887462912, "grad_norm": 0.05525504850931629, "learning_rate": 8.120560321237608e-05, "loss": 0.8433, "step": 163130 }, { "epoch": 2.864165452342913, "grad_norm": 0.06398093264421183, "learning_rate": 8.119468472353206e-05, "loss": 0.8434, "step": 163140 }, { "epoch": 2.8643410172229147, "grad_norm": 0.05594982036241478, "learning_rate": 8.118376648562514e-05, "loss": 0.8447, "step": 163150 }, { "epoch": 2.864516582102916, "grad_norm": 0.05859826752301311, "learning_rate": 8.117284849879373e-05, "loss": 0.8433, "step": 163160 }, { "epoch": 2.8646921469829176, "grad_norm": 0.10678554876006183, "learning_rate": 8.116193076317613e-05, "loss": 0.8459, "step": 163170 }, { "epoch": 2.864867711862919, "grad_norm": 0.05813428486022015, "learning_rate": 8.115101327891063e-05, "loss": 0.8384, "step": 163180 }, { "epoch": 2.86504327674292, "grad_norm": 0.046226082818739696, "learning_rate": 8.114009604613564e-05, "loss": 0.8456, "step": 163190 }, { "epoch": 2.8652188416229216, "grad_norm": 0.05741284212655288, "learning_rate": 8.112917906498945e-05, "loss": 0.8518, "step": 163200 }, { "epoch": 2.865394406502923, "grad_norm": 0.048346226437310874, "learning_rate": 8.111826233561035e-05, "loss": 0.8505, "step": 163210 }, { "epoch": 2.8655699713829246, "grad_norm": 0.06683782646608724, "learning_rate": 8.110734585813664e-05, "loss": 0.8414, "step": 163220 }, { "epoch": 2.865745536262926, "grad_norm": 0.045426886313238976, "learning_rate": 8.10964296327067e-05, "loss": 0.8408, "step": 163230 }, { "epoch": 2.8659211011429275, "grad_norm": 0.06033805810401283, "learning_rate": 8.108551365945884e-05, "loss": 0.8562, "step": 163240 }, { "epoch": 2.866096666022929, "grad_norm": 0.05266856444507684, "learning_rate": 8.107459793853129e-05, "loss": 0.8488, "step": 163250 }, { "epoch": 2.86627223090293, "grad_norm": 0.08339892545292522, "learning_rate": 8.106368247006243e-05, "loss": 0.8341, "step": 163260 }, { "epoch": 2.8664477957829315, "grad_norm": 0.05614205030508958, "learning_rate": 8.105276725419053e-05, "loss": 0.8394, "step": 163270 }, { "epoch": 2.866623360662933, "grad_norm": 0.049346279324845366, "learning_rate": 8.104185229105385e-05, "loss": 0.8434, "step": 163280 }, { "epoch": 2.8667989255429345, "grad_norm": 0.05092970896104869, "learning_rate": 8.103093758079078e-05, "loss": 0.8476, "step": 163290 }, { "epoch": 2.866974490422936, "grad_norm": 0.052788375025162246, "learning_rate": 8.102002312353956e-05, "loss": 0.8449, "step": 163300 }, { "epoch": 2.867150055302937, "grad_norm": 0.07002463118425746, "learning_rate": 8.100910891943847e-05, "loss": 0.8405, "step": 163310 }, { "epoch": 2.8673256201829385, "grad_norm": 0.05196463676732262, "learning_rate": 8.099819496862576e-05, "loss": 0.8407, "step": 163320 }, { "epoch": 2.86750118506294, "grad_norm": 0.05830463412388009, "learning_rate": 8.09872812712398e-05, "loss": 0.8401, "step": 163330 }, { "epoch": 2.8676767499429414, "grad_norm": 0.04965013704044104, "learning_rate": 8.097636782741881e-05, "loss": 0.8428, "step": 163340 }, { "epoch": 2.867852314822943, "grad_norm": 0.05435896288585626, "learning_rate": 8.096545463730107e-05, "loss": 0.8504, "step": 163350 }, { "epoch": 2.8680278797029444, "grad_norm": 0.05605073974557314, "learning_rate": 8.095454170102487e-05, "loss": 0.8469, "step": 163360 }, { "epoch": 2.868203444582946, "grad_norm": 0.054974432845167336, "learning_rate": 8.094362901872846e-05, "loss": 0.833, "step": 163370 }, { "epoch": 2.868379009462947, "grad_norm": 0.04597222943413021, "learning_rate": 8.093271659055013e-05, "loss": 0.8416, "step": 163380 }, { "epoch": 2.8685545743429484, "grad_norm": 0.06056814201321721, "learning_rate": 8.092180441662808e-05, "loss": 0.8456, "step": 163390 }, { "epoch": 2.86873013922295, "grad_norm": 0.049990943562034706, "learning_rate": 8.091089249710065e-05, "loss": 0.8372, "step": 163400 }, { "epoch": 2.8689057041029513, "grad_norm": 0.051530910982830226, "learning_rate": 8.089998083210607e-05, "loss": 0.8436, "step": 163410 }, { "epoch": 2.869081268982953, "grad_norm": 0.057118034433227945, "learning_rate": 8.088906942178256e-05, "loss": 0.8393, "step": 163420 }, { "epoch": 2.869256833862954, "grad_norm": 0.04717821686598869, "learning_rate": 8.087815826626841e-05, "loss": 0.8359, "step": 163430 }, { "epoch": 2.8694323987429553, "grad_norm": 0.05591925332329214, "learning_rate": 8.086724736570185e-05, "loss": 0.8472, "step": 163440 }, { "epoch": 2.869607963622957, "grad_norm": 0.054229705204863234, "learning_rate": 8.085633672022111e-05, "loss": 0.8389, "step": 163450 }, { "epoch": 2.8697835285029583, "grad_norm": 0.04940609969856351, "learning_rate": 8.084542632996446e-05, "loss": 0.8433, "step": 163460 }, { "epoch": 2.8699590933829597, "grad_norm": 0.044581899690309415, "learning_rate": 8.083451619507013e-05, "loss": 0.8403, "step": 163470 }, { "epoch": 2.870134658262961, "grad_norm": 0.06504490892970459, "learning_rate": 8.082360631567634e-05, "loss": 0.8477, "step": 163480 }, { "epoch": 2.8703102231429627, "grad_norm": 0.062387831742608424, "learning_rate": 8.08126966919213e-05, "loss": 0.842, "step": 163490 }, { "epoch": 2.8704857880229637, "grad_norm": 0.059434905620544674, "learning_rate": 8.08017873239433e-05, "loss": 0.8323, "step": 163500 }, { "epoch": 2.870661352902965, "grad_norm": 0.05444103492686895, "learning_rate": 8.079087821188051e-05, "loss": 0.8471, "step": 163510 }, { "epoch": 2.8708369177829667, "grad_norm": 0.06124977438126976, "learning_rate": 8.077996935587116e-05, "loss": 0.8437, "step": 163520 }, { "epoch": 2.871012482662968, "grad_norm": 0.08425522350222941, "learning_rate": 8.076906075605347e-05, "loss": 0.8367, "step": 163530 }, { "epoch": 2.8711880475429696, "grad_norm": 0.05391009515541444, "learning_rate": 8.075815241256568e-05, "loss": 0.8476, "step": 163540 }, { "epoch": 2.8713636124229707, "grad_norm": 0.056470803750990596, "learning_rate": 8.074724432554596e-05, "loss": 0.8453, "step": 163550 }, { "epoch": 2.8715391773029726, "grad_norm": 0.05005768544533669, "learning_rate": 8.07363364951325e-05, "loss": 0.8473, "step": 163560 }, { "epoch": 2.8717147421829736, "grad_norm": 0.05020742185704079, "learning_rate": 8.07254289214636e-05, "loss": 0.844, "step": 163570 }, { "epoch": 2.871890307062975, "grad_norm": 0.04940352338640399, "learning_rate": 8.071452160467738e-05, "loss": 0.8383, "step": 163580 }, { "epoch": 2.8720658719429766, "grad_norm": 0.06140483222707047, "learning_rate": 8.070361454491205e-05, "loss": 0.8476, "step": 163590 }, { "epoch": 2.872241436822978, "grad_norm": 0.05612778976012454, "learning_rate": 8.069270774230582e-05, "loss": 0.8455, "step": 163600 }, { "epoch": 2.8724170017029795, "grad_norm": 0.06041962494337837, "learning_rate": 8.068180119699689e-05, "loss": 0.8422, "step": 163610 }, { "epoch": 2.8725925665829806, "grad_norm": 0.05754898274970149, "learning_rate": 8.067089490912343e-05, "loss": 0.8452, "step": 163620 }, { "epoch": 2.872768131462982, "grad_norm": 0.05176380510696448, "learning_rate": 8.065998887882358e-05, "loss": 0.8461, "step": 163630 }, { "epoch": 2.8729436963429835, "grad_norm": 0.05555859871069693, "learning_rate": 8.064908310623562e-05, "loss": 0.8401, "step": 163640 }, { "epoch": 2.873119261222985, "grad_norm": 0.0509709137396716, "learning_rate": 8.063817759149767e-05, "loss": 0.8476, "step": 163650 }, { "epoch": 2.8732948261029865, "grad_norm": 0.06647058964496766, "learning_rate": 8.062727233474788e-05, "loss": 0.8382, "step": 163660 }, { "epoch": 2.8734703909829875, "grad_norm": 0.06073905037657786, "learning_rate": 8.061636733612449e-05, "loss": 0.8512, "step": 163670 }, { "epoch": 2.8736459558629894, "grad_norm": 0.04360878303563667, "learning_rate": 8.060546259576564e-05, "loss": 0.8352, "step": 163680 }, { "epoch": 2.8738215207429905, "grad_norm": 0.049097171467727345, "learning_rate": 8.059455811380945e-05, "loss": 0.8353, "step": 163690 }, { "epoch": 2.873997085622992, "grad_norm": 0.05003799599546377, "learning_rate": 8.058365389039415e-05, "loss": 0.8425, "step": 163700 }, { "epoch": 2.8741726505029934, "grad_norm": 0.05288302778477292, "learning_rate": 8.057274992565787e-05, "loss": 0.8535, "step": 163710 }, { "epoch": 2.874348215382995, "grad_norm": 0.06503624905195027, "learning_rate": 8.056184621973875e-05, "loss": 0.8437, "step": 163720 }, { "epoch": 2.8745237802629964, "grad_norm": 0.06120406167658384, "learning_rate": 8.055094277277492e-05, "loss": 0.8404, "step": 163730 }, { "epoch": 2.8746993451429974, "grad_norm": 0.04874130447915148, "learning_rate": 8.05400395849046e-05, "loss": 0.8323, "step": 163740 }, { "epoch": 2.874874910022999, "grad_norm": 0.052575885676632704, "learning_rate": 8.052913665626591e-05, "loss": 0.8417, "step": 163750 }, { "epoch": 2.8750504749030004, "grad_norm": 0.057728949374124934, "learning_rate": 8.051823398699695e-05, "loss": 0.8459, "step": 163760 }, { "epoch": 2.875226039783002, "grad_norm": 0.056920732376910185, "learning_rate": 8.050733157723592e-05, "loss": 0.8461, "step": 163770 }, { "epoch": 2.8754016046630033, "grad_norm": 0.06928049898829447, "learning_rate": 8.049642942712093e-05, "loss": 0.8394, "step": 163780 }, { "epoch": 2.8755771695430044, "grad_norm": 0.0538632900877508, "learning_rate": 8.04855275367901e-05, "loss": 0.8398, "step": 163790 }, { "epoch": 2.8757527344230063, "grad_norm": 0.07533421882953373, "learning_rate": 8.047462590638154e-05, "loss": 0.8456, "step": 163800 }, { "epoch": 2.8759282993030073, "grad_norm": 0.06490630220946883, "learning_rate": 8.046372453603343e-05, "loss": 0.8345, "step": 163810 }, { "epoch": 2.876103864183009, "grad_norm": 0.06790292604353296, "learning_rate": 8.045282342588387e-05, "loss": 0.8441, "step": 163820 }, { "epoch": 2.8762794290630103, "grad_norm": 0.06781292671873275, "learning_rate": 8.044192257607094e-05, "loss": 0.8287, "step": 163830 }, { "epoch": 2.8764549939430117, "grad_norm": 0.05614155479539132, "learning_rate": 8.043102198673283e-05, "loss": 0.8436, "step": 163840 }, { "epoch": 2.876630558823013, "grad_norm": 0.051163105613065835, "learning_rate": 8.042012165800763e-05, "loss": 0.8463, "step": 163850 }, { "epoch": 2.8768061237030143, "grad_norm": 0.04492574894355648, "learning_rate": 8.040922159003339e-05, "loss": 0.8424, "step": 163860 }, { "epoch": 2.8769816885830157, "grad_norm": 0.057647834773502035, "learning_rate": 8.039832178294828e-05, "loss": 0.8444, "step": 163870 }, { "epoch": 2.877157253463017, "grad_norm": 0.046084739090995315, "learning_rate": 8.038742223689038e-05, "loss": 0.8382, "step": 163880 }, { "epoch": 2.8773328183430187, "grad_norm": 0.05420621571128835, "learning_rate": 8.03765229519978e-05, "loss": 0.8374, "step": 163890 }, { "epoch": 2.87750838322302, "grad_norm": 0.053182929121883736, "learning_rate": 8.036562392840858e-05, "loss": 0.8458, "step": 163900 }, { "epoch": 2.877683948103021, "grad_norm": 0.044492395965217056, "learning_rate": 8.035472516626089e-05, "loss": 0.8416, "step": 163910 }, { "epoch": 2.877859512983023, "grad_norm": 0.05213957798619921, "learning_rate": 8.03438266656928e-05, "loss": 0.8452, "step": 163920 }, { "epoch": 2.878035077863024, "grad_norm": 0.04565662268389031, "learning_rate": 8.033292842684237e-05, "loss": 0.8473, "step": 163930 }, { "epoch": 2.8782106427430256, "grad_norm": 0.04857716028988213, "learning_rate": 8.03220304498477e-05, "loss": 0.8342, "step": 163940 }, { "epoch": 2.878386207623027, "grad_norm": 0.0716437913526914, "learning_rate": 8.031113273484689e-05, "loss": 0.8439, "step": 163950 }, { "epoch": 2.8785617725030286, "grad_norm": 0.05451283660295179, "learning_rate": 8.030023528197797e-05, "loss": 0.8442, "step": 163960 }, { "epoch": 2.87873733738303, "grad_norm": 0.06507232167358691, "learning_rate": 8.0289338091379e-05, "loss": 0.8465, "step": 163970 }, { "epoch": 2.878912902263031, "grad_norm": 0.05702443535893696, "learning_rate": 8.027844116318814e-05, "loss": 0.8395, "step": 163980 }, { "epoch": 2.8790884671430326, "grad_norm": 0.04772749741698408, "learning_rate": 8.02675444975434e-05, "loss": 0.8424, "step": 163990 }, { "epoch": 2.879264032023034, "grad_norm": 0.05862817848584737, "learning_rate": 8.025664809458281e-05, "loss": 0.8404, "step": 164000 }, { "epoch": 2.8794395969030355, "grad_norm": 0.06464179080571206, "learning_rate": 8.02457519544445e-05, "loss": 0.8413, "step": 164010 }, { "epoch": 2.879615161783037, "grad_norm": 0.0525854322260668, "learning_rate": 8.023485607726648e-05, "loss": 0.8347, "step": 164020 }, { "epoch": 2.8797907266630385, "grad_norm": 0.049699587562264544, "learning_rate": 8.02239604631868e-05, "loss": 0.8476, "step": 164030 }, { "epoch": 2.87996629154304, "grad_norm": 0.050489485106475246, "learning_rate": 8.021306511234354e-05, "loss": 0.8411, "step": 164040 }, { "epoch": 2.880141856423041, "grad_norm": 0.05773654814520366, "learning_rate": 8.020217002487473e-05, "loss": 0.8442, "step": 164050 }, { "epoch": 2.8803174213030425, "grad_norm": 0.049342532508366344, "learning_rate": 8.019127520091841e-05, "loss": 0.8464, "step": 164060 }, { "epoch": 2.880492986183044, "grad_norm": 0.05063845328365805, "learning_rate": 8.018038064061258e-05, "loss": 0.8413, "step": 164070 }, { "epoch": 2.8806685510630454, "grad_norm": 0.05464963244407542, "learning_rate": 8.016948634409535e-05, "loss": 0.8449, "step": 164080 }, { "epoch": 2.880844115943047, "grad_norm": 0.05716379777508978, "learning_rate": 8.015859231150474e-05, "loss": 0.8444, "step": 164090 }, { "epoch": 2.881019680823048, "grad_norm": 0.06119433129876756, "learning_rate": 8.014769854297873e-05, "loss": 0.8509, "step": 164100 }, { "epoch": 2.8811952457030494, "grad_norm": 0.05181638914098098, "learning_rate": 8.013680503865539e-05, "loss": 0.8444, "step": 164110 }, { "epoch": 2.881370810583051, "grad_norm": 0.0554875437189586, "learning_rate": 8.012591179867272e-05, "loss": 0.8414, "step": 164120 }, { "epoch": 2.8815463754630524, "grad_norm": 0.046560522920289116, "learning_rate": 8.011501882316877e-05, "loss": 0.8388, "step": 164130 }, { "epoch": 2.881721940343054, "grad_norm": 0.054509868678674474, "learning_rate": 8.010412611228148e-05, "loss": 0.8453, "step": 164140 }, { "epoch": 2.8818975052230553, "grad_norm": 0.07593517343067131, "learning_rate": 8.009323366614896e-05, "loss": 0.8505, "step": 164150 }, { "epoch": 2.882073070103057, "grad_norm": 0.09009387524857683, "learning_rate": 8.008234148490918e-05, "loss": 0.8398, "step": 164160 }, { "epoch": 2.882248634983058, "grad_norm": 0.06679378239060768, "learning_rate": 8.007144956870012e-05, "loss": 0.8408, "step": 164170 }, { "epoch": 2.8824241998630593, "grad_norm": 0.050529121174976675, "learning_rate": 8.006055791765983e-05, "loss": 0.8483, "step": 164180 }, { "epoch": 2.882599764743061, "grad_norm": 0.050952368641644485, "learning_rate": 8.004966653192628e-05, "loss": 0.8353, "step": 164190 }, { "epoch": 2.8827753296230623, "grad_norm": 0.050381294388273594, "learning_rate": 8.003877541163744e-05, "loss": 0.8443, "step": 164200 }, { "epoch": 2.8829508945030637, "grad_norm": 0.04964273682025577, "learning_rate": 8.002788455693136e-05, "loss": 0.8455, "step": 164210 }, { "epoch": 2.8831264593830648, "grad_norm": 0.06301153018468897, "learning_rate": 8.001699396794601e-05, "loss": 0.8444, "step": 164220 }, { "epoch": 2.8833020242630663, "grad_norm": 0.04969106786133441, "learning_rate": 8.000610364481936e-05, "loss": 0.8471, "step": 164230 }, { "epoch": 2.8834775891430677, "grad_norm": 0.050907621357371556, "learning_rate": 7.999521358768936e-05, "loss": 0.8385, "step": 164240 }, { "epoch": 2.883653154023069, "grad_norm": 0.11008279542007941, "learning_rate": 7.998432379669407e-05, "loss": 0.8453, "step": 164250 }, { "epoch": 2.8838287189030707, "grad_norm": 0.053294209910177394, "learning_rate": 7.997343427197143e-05, "loss": 0.843, "step": 164260 }, { "epoch": 2.884004283783072, "grad_norm": 0.05525098234896964, "learning_rate": 7.996254501365938e-05, "loss": 0.8443, "step": 164270 }, { "epoch": 2.8841798486630736, "grad_norm": 0.06409245107831955, "learning_rate": 7.995165602189595e-05, "loss": 0.8404, "step": 164280 }, { "epoch": 2.8843554135430747, "grad_norm": 0.04288998488917623, "learning_rate": 7.994076729681907e-05, "loss": 0.8468, "step": 164290 }, { "epoch": 2.884530978423076, "grad_norm": 0.044541377236568376, "learning_rate": 7.992987883856669e-05, "loss": 0.847, "step": 164300 }, { "epoch": 2.8847065433030776, "grad_norm": 0.04797457344656801, "learning_rate": 7.991899064727675e-05, "loss": 0.8401, "step": 164310 }, { "epoch": 2.884882108183079, "grad_norm": 0.04809825096581969, "learning_rate": 7.990810272308726e-05, "loss": 0.8388, "step": 164320 }, { "epoch": 2.8850576730630806, "grad_norm": 0.06618928809774713, "learning_rate": 7.989721506613618e-05, "loss": 0.8439, "step": 164330 }, { "epoch": 2.8852332379430816, "grad_norm": 0.052838659786869364, "learning_rate": 7.98863276765614e-05, "loss": 0.8513, "step": 164340 }, { "epoch": 2.8854088028230835, "grad_norm": 0.052577542515685846, "learning_rate": 7.987544055450091e-05, "loss": 0.8449, "step": 164350 }, { "epoch": 2.8855843677030846, "grad_norm": 0.04481390825164697, "learning_rate": 7.986455370009265e-05, "loss": 0.8399, "step": 164360 }, { "epoch": 2.885759932583086, "grad_norm": 0.04113300278070752, "learning_rate": 7.985366711347451e-05, "loss": 0.8374, "step": 164370 }, { "epoch": 2.8859354974630875, "grad_norm": 0.042794710202876854, "learning_rate": 7.984278079478448e-05, "loss": 0.8432, "step": 164380 }, { "epoch": 2.886111062343089, "grad_norm": 0.06162722694628333, "learning_rate": 7.983189474416048e-05, "loss": 0.838, "step": 164390 }, { "epoch": 2.8862866272230905, "grad_norm": 0.05913161594201383, "learning_rate": 7.982100896174043e-05, "loss": 0.8385, "step": 164400 }, { "epoch": 2.8864621921030915, "grad_norm": 0.052233208898119485, "learning_rate": 7.981012344766222e-05, "loss": 0.8327, "step": 164410 }, { "epoch": 2.886637756983093, "grad_norm": 0.044276303062170626, "learning_rate": 7.979923820206383e-05, "loss": 0.8402, "step": 164420 }, { "epoch": 2.8868133218630945, "grad_norm": 0.052317884569435306, "learning_rate": 7.978835322508316e-05, "loss": 0.8353, "step": 164430 }, { "epoch": 2.886988886743096, "grad_norm": 0.06436234771140761, "learning_rate": 7.977746851685811e-05, "loss": 0.8448, "step": 164440 }, { "epoch": 2.8871644516230974, "grad_norm": 0.048543116616244546, "learning_rate": 7.976658407752661e-05, "loss": 0.8498, "step": 164450 }, { "epoch": 2.8873400165030985, "grad_norm": 0.04556062547954166, "learning_rate": 7.975569990722657e-05, "loss": 0.8423, "step": 164460 }, { "epoch": 2.8875155813831004, "grad_norm": 0.045746954669165556, "learning_rate": 7.974481600609589e-05, "loss": 0.8372, "step": 164470 }, { "epoch": 2.8876911462631014, "grad_norm": 0.06848181654166628, "learning_rate": 7.97339323742724e-05, "loss": 0.8434, "step": 164480 }, { "epoch": 2.887866711143103, "grad_norm": 0.04428072130418775, "learning_rate": 7.972304901189412e-05, "loss": 0.8457, "step": 164490 }, { "epoch": 2.8880422760231044, "grad_norm": 0.0660548780984429, "learning_rate": 7.971216591909889e-05, "loss": 0.8483, "step": 164500 }, { "epoch": 2.888217840903106, "grad_norm": 0.049371849817551695, "learning_rate": 7.970128309602458e-05, "loss": 0.8338, "step": 164510 }, { "epoch": 2.8883934057831073, "grad_norm": 0.057561847424960365, "learning_rate": 7.96904005428091e-05, "loss": 0.8418, "step": 164520 }, { "epoch": 2.8885689706631084, "grad_norm": 0.04417383830317901, "learning_rate": 7.967951825959033e-05, "loss": 0.8446, "step": 164530 }, { "epoch": 2.88874453554311, "grad_norm": 0.04841890504133837, "learning_rate": 7.966863624650615e-05, "loss": 0.8431, "step": 164540 }, { "epoch": 2.8889201004231113, "grad_norm": 0.057932762906252495, "learning_rate": 7.965775450369444e-05, "loss": 0.8431, "step": 164550 }, { "epoch": 2.889095665303113, "grad_norm": 0.04487903632591741, "learning_rate": 7.964687303129305e-05, "loss": 0.8417, "step": 164560 }, { "epoch": 2.8892712301831143, "grad_norm": 0.045707941855346525, "learning_rate": 7.96359918294399e-05, "loss": 0.8387, "step": 164570 }, { "epoch": 2.8894467950631153, "grad_norm": 0.1779684039108651, "learning_rate": 7.96251108982728e-05, "loss": 0.8374, "step": 164580 }, { "epoch": 2.8896223599431172, "grad_norm": 0.06519482879720584, "learning_rate": 7.961423023792965e-05, "loss": 0.8375, "step": 164590 }, { "epoch": 2.8897979248231183, "grad_norm": 0.07128934392250127, "learning_rate": 7.960334984854831e-05, "loss": 0.8442, "step": 164600 }, { "epoch": 2.8899734897031197, "grad_norm": 0.042749673591414114, "learning_rate": 7.959246973026661e-05, "loss": 0.8464, "step": 164610 }, { "epoch": 2.890149054583121, "grad_norm": 0.04893387632849866, "learning_rate": 7.958158988322245e-05, "loss": 0.8295, "step": 164620 }, { "epoch": 2.8903246194631227, "grad_norm": 0.06155614682734781, "learning_rate": 7.957071030755366e-05, "loss": 0.8356, "step": 164630 }, { "epoch": 2.890500184343124, "grad_norm": 0.07078123724694628, "learning_rate": 7.955983100339804e-05, "loss": 0.8317, "step": 164640 }, { "epoch": 2.890675749223125, "grad_norm": 0.06126176235016261, "learning_rate": 7.954895197089345e-05, "loss": 0.8438, "step": 164650 }, { "epoch": 2.8908513141031267, "grad_norm": 0.06533619459505607, "learning_rate": 7.95380732101778e-05, "loss": 0.8383, "step": 164660 }, { "epoch": 2.891026878983128, "grad_norm": 0.05487284975067578, "learning_rate": 7.952719472138886e-05, "loss": 0.8407, "step": 164670 }, { "epoch": 2.8912024438631296, "grad_norm": 0.04537778914844278, "learning_rate": 7.951631650466447e-05, "loss": 0.8461, "step": 164680 }, { "epoch": 2.891378008743131, "grad_norm": 0.04703510405876707, "learning_rate": 7.950543856014248e-05, "loss": 0.8425, "step": 164690 }, { "epoch": 2.8915535736231326, "grad_norm": 0.04825765736566031, "learning_rate": 7.94945608879607e-05, "loss": 0.8354, "step": 164700 }, { "epoch": 2.891729138503134, "grad_norm": 0.04541220782718501, "learning_rate": 7.948368348825696e-05, "loss": 0.8383, "step": 164710 }, { "epoch": 2.891904703383135, "grad_norm": 0.04527739055181864, "learning_rate": 7.947280636116907e-05, "loss": 0.8461, "step": 164720 }, { "epoch": 2.8920802682631366, "grad_norm": 0.049083591717485095, "learning_rate": 7.946192950683485e-05, "loss": 0.8475, "step": 164730 }, { "epoch": 2.892255833143138, "grad_norm": 0.04790645419383023, "learning_rate": 7.945105292539212e-05, "loss": 0.8477, "step": 164740 }, { "epoch": 2.8924313980231395, "grad_norm": 0.061216821428596045, "learning_rate": 7.944017661697862e-05, "loss": 0.8341, "step": 164750 }, { "epoch": 2.892606962903141, "grad_norm": 0.0638818083442616, "learning_rate": 7.942930058173226e-05, "loss": 0.8373, "step": 164760 }, { "epoch": 2.892782527783142, "grad_norm": 0.047463435283314204, "learning_rate": 7.941842481979081e-05, "loss": 0.844, "step": 164770 }, { "epoch": 2.8929580926631435, "grad_norm": 0.0751329154988648, "learning_rate": 7.940754933129204e-05, "loss": 0.8386, "step": 164780 }, { "epoch": 2.893133657543145, "grad_norm": 0.05049052898937298, "learning_rate": 7.939667411637375e-05, "loss": 0.847, "step": 164790 }, { "epoch": 2.8933092224231465, "grad_norm": 0.047994846908921474, "learning_rate": 7.938579917517376e-05, "loss": 0.8427, "step": 164800 }, { "epoch": 2.893484787303148, "grad_norm": 0.07289777191896038, "learning_rate": 7.937492450782985e-05, "loss": 0.8428, "step": 164810 }, { "epoch": 2.8936603521831494, "grad_norm": 0.05601936831815173, "learning_rate": 7.93640501144797e-05, "loss": 0.84, "step": 164820 }, { "epoch": 2.893835917063151, "grad_norm": 0.045425698523703845, "learning_rate": 7.935317599526126e-05, "loss": 0.8432, "step": 164830 }, { "epoch": 2.894011481943152, "grad_norm": 0.04651845258407664, "learning_rate": 7.934230215031223e-05, "loss": 0.8414, "step": 164840 }, { "epoch": 2.8941870468231534, "grad_norm": 0.04728872284449045, "learning_rate": 7.933142857977035e-05, "loss": 0.8504, "step": 164850 }, { "epoch": 2.894362611703155, "grad_norm": 0.05601568586353126, "learning_rate": 7.932055528377345e-05, "loss": 0.8451, "step": 164860 }, { "epoch": 2.8945381765831564, "grad_norm": 0.054008424216360026, "learning_rate": 7.930968226245927e-05, "loss": 0.8407, "step": 164870 }, { "epoch": 2.894713741463158, "grad_norm": 0.0532644645711892, "learning_rate": 7.929880951596554e-05, "loss": 0.8432, "step": 164880 }, { "epoch": 2.894889306343159, "grad_norm": 0.0507627907228056, "learning_rate": 7.928793704443009e-05, "loss": 0.8508, "step": 164890 }, { "epoch": 2.8950648712231604, "grad_norm": 0.053066995405196854, "learning_rate": 7.927706484799062e-05, "loss": 0.8379, "step": 164900 }, { "epoch": 2.895240436103162, "grad_norm": 0.06185318189538912, "learning_rate": 7.926619292678492e-05, "loss": 0.8389, "step": 164910 }, { "epoch": 2.8954160009831633, "grad_norm": 0.047291741811332375, "learning_rate": 7.925532128095067e-05, "loss": 0.8398, "step": 164920 }, { "epoch": 2.895591565863165, "grad_norm": 0.04449707001193786, "learning_rate": 7.924444991062571e-05, "loss": 0.8396, "step": 164930 }, { "epoch": 2.8957671307431663, "grad_norm": 0.058939464900428264, "learning_rate": 7.923357881594775e-05, "loss": 0.8431, "step": 164940 }, { "epoch": 2.8959426956231678, "grad_norm": 0.054200014118717915, "learning_rate": 7.922270799705448e-05, "loss": 0.8418, "step": 164950 }, { "epoch": 2.896118260503169, "grad_norm": 0.05837151621951188, "learning_rate": 7.92118374540837e-05, "loss": 0.8412, "step": 164960 }, { "epoch": 2.8962938253831703, "grad_norm": 0.06107232668243562, "learning_rate": 7.920096718717311e-05, "loss": 0.8521, "step": 164970 }, { "epoch": 2.8964693902631717, "grad_norm": 0.048827021047823396, "learning_rate": 7.919009719646045e-05, "loss": 0.8323, "step": 164980 }, { "epoch": 2.896644955143173, "grad_norm": 0.05161919529471505, "learning_rate": 7.917922748208339e-05, "loss": 0.843, "step": 164990 }, { "epoch": 2.8968205200231747, "grad_norm": 0.06033225716024112, "learning_rate": 7.916835804417975e-05, "loss": 0.8486, "step": 165000 }, { "epoch": 2.8969960849031757, "grad_norm": 0.05739356371418361, "learning_rate": 7.91574888828872e-05, "loss": 0.8508, "step": 165010 }, { "epoch": 2.8971716497831776, "grad_norm": 0.0597487299890794, "learning_rate": 7.914661999834341e-05, "loss": 0.8369, "step": 165020 }, { "epoch": 2.8973472146631787, "grad_norm": 0.05735902193027662, "learning_rate": 7.913575139068618e-05, "loss": 0.8468, "step": 165030 }, { "epoch": 2.89752277954318, "grad_norm": 0.04900408101297695, "learning_rate": 7.912488306005315e-05, "loss": 0.8473, "step": 165040 }, { "epoch": 2.8976983444231816, "grad_norm": 0.05548607137323572, "learning_rate": 7.911401500658204e-05, "loss": 0.8378, "step": 165050 }, { "epoch": 2.897873909303183, "grad_norm": 0.05598183534115939, "learning_rate": 7.910314723041058e-05, "loss": 0.8357, "step": 165060 }, { "epoch": 2.8980494741831846, "grad_norm": 0.047376233464168076, "learning_rate": 7.909227973167642e-05, "loss": 0.8385, "step": 165070 }, { "epoch": 2.8982250390631856, "grad_norm": 0.06127089571989709, "learning_rate": 7.908141251051728e-05, "loss": 0.842, "step": 165080 }, { "epoch": 2.898400603943187, "grad_norm": 0.05207108200068786, "learning_rate": 7.907054556707082e-05, "loss": 0.8389, "step": 165090 }, { "epoch": 2.8985761688231886, "grad_norm": 0.05810174549344937, "learning_rate": 7.905967890147478e-05, "loss": 0.8392, "step": 165100 }, { "epoch": 2.89875173370319, "grad_norm": 0.05308136493040261, "learning_rate": 7.90488125138668e-05, "loss": 0.8461, "step": 165110 }, { "epoch": 2.8989272985831915, "grad_norm": 0.05499762315284451, "learning_rate": 7.903794640438457e-05, "loss": 0.8418, "step": 165120 }, { "epoch": 2.8991028634631926, "grad_norm": 0.07884283117068985, "learning_rate": 7.902708057316578e-05, "loss": 0.8413, "step": 165130 }, { "epoch": 2.8992784283431945, "grad_norm": 0.06470154533631292, "learning_rate": 7.901621502034809e-05, "loss": 0.8444, "step": 165140 }, { "epoch": 2.8994539932231955, "grad_norm": 0.05180766531293872, "learning_rate": 7.900534974606916e-05, "loss": 0.838, "step": 165150 }, { "epoch": 2.899629558103197, "grad_norm": 0.06182557993704867, "learning_rate": 7.899448475046664e-05, "loss": 0.8378, "step": 165160 }, { "epoch": 2.8998051229831985, "grad_norm": 0.04653752642248593, "learning_rate": 7.898362003367825e-05, "loss": 0.8443, "step": 165170 }, { "epoch": 2.8999806878632, "grad_norm": 0.05409006350120545, "learning_rate": 7.897275559584161e-05, "loss": 0.8381, "step": 165180 }, { "epoch": 2.9001562527432014, "grad_norm": 0.052743204096247595, "learning_rate": 7.896189143709437e-05, "loss": 0.8412, "step": 165190 }, { "epoch": 2.9003318176232025, "grad_norm": 0.04892632007477496, "learning_rate": 7.895102755757418e-05, "loss": 0.836, "step": 165200 }, { "epoch": 2.900507382503204, "grad_norm": 0.07529828766102573, "learning_rate": 7.894016395741873e-05, "loss": 0.8383, "step": 165210 }, { "epoch": 2.9006829473832054, "grad_norm": 0.06498285067366927, "learning_rate": 7.892930063676561e-05, "loss": 0.8377, "step": 165220 }, { "epoch": 2.900858512263207, "grad_norm": 0.05387818699544917, "learning_rate": 7.891843759575249e-05, "loss": 0.8486, "step": 165230 }, { "epoch": 2.9010340771432084, "grad_norm": 0.06230406054209931, "learning_rate": 7.890757483451699e-05, "loss": 0.8428, "step": 165240 }, { "epoch": 2.9012096420232094, "grad_norm": 0.06493074229677168, "learning_rate": 7.889671235319676e-05, "loss": 0.8401, "step": 165250 }, { "epoch": 2.9013852069032113, "grad_norm": 0.04902074329873495, "learning_rate": 7.888585015192937e-05, "loss": 0.8494, "step": 165260 }, { "epoch": 2.9015607717832124, "grad_norm": 0.061237183083524556, "learning_rate": 7.887498823085254e-05, "loss": 0.8438, "step": 165270 }, { "epoch": 2.901736336663214, "grad_norm": 0.06198898877944065, "learning_rate": 7.886412659010386e-05, "loss": 0.8428, "step": 165280 }, { "epoch": 2.9019119015432153, "grad_norm": 0.04848012839689154, "learning_rate": 7.885326522982092e-05, "loss": 0.8433, "step": 165290 }, { "epoch": 2.902087466423217, "grad_norm": 0.061282570575668896, "learning_rate": 7.884240415014136e-05, "loss": 0.8438, "step": 165300 }, { "epoch": 2.9022630313032183, "grad_norm": 0.04579474890976249, "learning_rate": 7.88315433512028e-05, "loss": 0.8445, "step": 165310 }, { "epoch": 2.9024385961832193, "grad_norm": 0.0872503554614973, "learning_rate": 7.88206828331428e-05, "loss": 0.8456, "step": 165320 }, { "epoch": 2.902614161063221, "grad_norm": 0.05990792300212447, "learning_rate": 7.880982259609901e-05, "loss": 0.844, "step": 165330 }, { "epoch": 2.9027897259432223, "grad_norm": 0.054948028245571885, "learning_rate": 7.879896264020903e-05, "loss": 0.841, "step": 165340 }, { "epoch": 2.9029652908232237, "grad_norm": 0.04383429433797701, "learning_rate": 7.878810296561045e-05, "loss": 0.8436, "step": 165350 }, { "epoch": 2.903140855703225, "grad_norm": 0.049726534044412844, "learning_rate": 7.877724357244084e-05, "loss": 0.8368, "step": 165360 }, { "epoch": 2.9033164205832263, "grad_norm": 0.06542284634239183, "learning_rate": 7.876638446083784e-05, "loss": 0.8402, "step": 165370 }, { "epoch": 2.903491985463228, "grad_norm": 0.05349988315403465, "learning_rate": 7.8755525630939e-05, "loss": 0.8542, "step": 165380 }, { "epoch": 2.903667550343229, "grad_norm": 0.04880275663184911, "learning_rate": 7.874466708288191e-05, "loss": 0.8429, "step": 165390 }, { "epoch": 2.9038431152232307, "grad_norm": 0.058924372443803025, "learning_rate": 7.873380881680416e-05, "loss": 0.8376, "step": 165400 }, { "epoch": 2.904018680103232, "grad_norm": 0.06039927239169922, "learning_rate": 7.872295083284334e-05, "loss": 0.8456, "step": 165410 }, { "epoch": 2.9041942449832336, "grad_norm": 0.046543890782821125, "learning_rate": 7.871209313113699e-05, "loss": 0.8444, "step": 165420 }, { "epoch": 2.904369809863235, "grad_norm": 0.05398777329956209, "learning_rate": 7.870123571182264e-05, "loss": 0.8413, "step": 165430 }, { "epoch": 2.904545374743236, "grad_norm": 0.05696068515737202, "learning_rate": 7.869037857503796e-05, "loss": 0.8394, "step": 165440 }, { "epoch": 2.9047209396232376, "grad_norm": 0.046054667280499326, "learning_rate": 7.867952172092045e-05, "loss": 0.8462, "step": 165450 }, { "epoch": 2.904896504503239, "grad_norm": 0.04569259050513103, "learning_rate": 7.866866514960766e-05, "loss": 0.8439, "step": 165460 }, { "epoch": 2.9050720693832406, "grad_norm": 0.0524147587428973, "learning_rate": 7.865780886123719e-05, "loss": 0.8251, "step": 165470 }, { "epoch": 2.905247634263242, "grad_norm": 0.042754109970196075, "learning_rate": 7.864695285594655e-05, "loss": 0.8406, "step": 165480 }, { "epoch": 2.9054231991432435, "grad_norm": 0.04896570510736805, "learning_rate": 7.863609713387332e-05, "loss": 0.8362, "step": 165490 }, { "epoch": 2.905598764023245, "grad_norm": 0.044885225661702076, "learning_rate": 7.862524169515497e-05, "loss": 0.8363, "step": 165500 }, { "epoch": 2.905774328903246, "grad_norm": 0.056392219419541215, "learning_rate": 7.861438653992914e-05, "loss": 0.852, "step": 165510 }, { "epoch": 2.9059498937832475, "grad_norm": 0.037288573692969375, "learning_rate": 7.860353166833333e-05, "loss": 0.8431, "step": 165520 }, { "epoch": 2.906125458663249, "grad_norm": 0.052324638957516684, "learning_rate": 7.859267708050503e-05, "loss": 0.8471, "step": 165530 }, { "epoch": 2.9063010235432505, "grad_norm": 0.05192738990188919, "learning_rate": 7.858182277658184e-05, "loss": 0.8534, "step": 165540 }, { "epoch": 2.906476588423252, "grad_norm": 0.04237851082354374, "learning_rate": 7.857096875670123e-05, "loss": 0.8378, "step": 165550 }, { "epoch": 2.906652153303253, "grad_norm": 0.04767650915085455, "learning_rate": 7.856011502100075e-05, "loss": 0.8432, "step": 165560 }, { "epoch": 2.9068277181832545, "grad_norm": 0.05300932904915909, "learning_rate": 7.854926156961793e-05, "loss": 0.8421, "step": 165570 }, { "epoch": 2.907003283063256, "grad_norm": 0.06475118633688669, "learning_rate": 7.853840840269027e-05, "loss": 0.8375, "step": 165580 }, { "epoch": 2.9071788479432574, "grad_norm": 0.04989486812906141, "learning_rate": 7.852755552035526e-05, "loss": 0.8437, "step": 165590 }, { "epoch": 2.907354412823259, "grad_norm": 0.061831890170899816, "learning_rate": 7.851670292275041e-05, "loss": 0.846, "step": 165600 }, { "epoch": 2.9075299777032604, "grad_norm": 0.05452765804967601, "learning_rate": 7.850585061001327e-05, "loss": 0.8395, "step": 165610 }, { "epoch": 2.907705542583262, "grad_norm": 0.05869293199194346, "learning_rate": 7.849499858228131e-05, "loss": 0.8447, "step": 165620 }, { "epoch": 2.907881107463263, "grad_norm": 0.06832213540372248, "learning_rate": 7.848414683969205e-05, "loss": 0.8448, "step": 165630 }, { "epoch": 2.9080566723432644, "grad_norm": 0.06248404787433853, "learning_rate": 7.847329538238293e-05, "loss": 0.8428, "step": 165640 }, { "epoch": 2.908232237223266, "grad_norm": 0.06917811180229637, "learning_rate": 7.84624442104915e-05, "loss": 0.8414, "step": 165650 }, { "epoch": 2.9084078021032673, "grad_norm": 0.07265304295679542, "learning_rate": 7.845159332415524e-05, "loss": 0.8408, "step": 165660 }, { "epoch": 2.908583366983269, "grad_norm": 0.04973933192077872, "learning_rate": 7.844074272351155e-05, "loss": 0.8403, "step": 165670 }, { "epoch": 2.90875893186327, "grad_norm": 0.05226555193730388, "learning_rate": 7.842989240869801e-05, "loss": 0.8467, "step": 165680 }, { "epoch": 2.9089344967432713, "grad_norm": 0.05395302213528715, "learning_rate": 7.841904237985206e-05, "loss": 0.8427, "step": 165690 }, { "epoch": 2.909110061623273, "grad_norm": 0.04368203364067192, "learning_rate": 7.840819263711116e-05, "loss": 0.8461, "step": 165700 }, { "epoch": 2.9092856265032743, "grad_norm": 0.04726870642649374, "learning_rate": 7.839734318061281e-05, "loss": 0.8446, "step": 165710 }, { "epoch": 2.9094611913832757, "grad_norm": 0.047443523137579394, "learning_rate": 7.838649401049444e-05, "loss": 0.8491, "step": 165720 }, { "epoch": 2.9096367562632772, "grad_norm": 0.05771561329891542, "learning_rate": 7.837564512689351e-05, "loss": 0.8401, "step": 165730 }, { "epoch": 2.9098123211432787, "grad_norm": 0.04635264701108506, "learning_rate": 7.836479652994752e-05, "loss": 0.8396, "step": 165740 }, { "epoch": 2.9099878860232797, "grad_norm": 0.04792438454035386, "learning_rate": 7.835394821979389e-05, "loss": 0.8398, "step": 165750 }, { "epoch": 2.910163450903281, "grad_norm": 0.0503070454850688, "learning_rate": 7.83431001965701e-05, "loss": 0.8533, "step": 165760 }, { "epoch": 2.9103390157832827, "grad_norm": 0.04593015625466987, "learning_rate": 7.833225246041349e-05, "loss": 0.8426, "step": 165770 }, { "epoch": 2.910514580663284, "grad_norm": 0.07494906546536324, "learning_rate": 7.832140501146166e-05, "loss": 0.8512, "step": 165780 }, { "epoch": 2.9106901455432856, "grad_norm": 0.06921750099233032, "learning_rate": 7.831055784985195e-05, "loss": 0.8331, "step": 165790 }, { "epoch": 2.9108657104232867, "grad_norm": 0.053961657216221544, "learning_rate": 7.829971097572182e-05, "loss": 0.8378, "step": 165800 }, { "epoch": 2.9110412753032886, "grad_norm": 0.06649835885607418, "learning_rate": 7.828886438920872e-05, "loss": 0.841, "step": 165810 }, { "epoch": 2.9112168401832896, "grad_norm": 0.05172761002991576, "learning_rate": 7.827801809045005e-05, "loss": 0.8397, "step": 165820 }, { "epoch": 2.911392405063291, "grad_norm": 0.04543963249207889, "learning_rate": 7.826717207958325e-05, "loss": 0.8375, "step": 165830 }, { "epoch": 2.9115679699432926, "grad_norm": 0.05095610716050716, "learning_rate": 7.82563263567457e-05, "loss": 0.8427, "step": 165840 }, { "epoch": 2.911743534823294, "grad_norm": 0.05273616924121215, "learning_rate": 7.824548092207489e-05, "loss": 0.8481, "step": 165850 }, { "epoch": 2.9119190997032955, "grad_norm": 0.055370374817857254, "learning_rate": 7.823463577570819e-05, "loss": 0.8406, "step": 165860 }, { "epoch": 2.9120946645832966, "grad_norm": 0.043660072190548016, "learning_rate": 7.822379091778301e-05, "loss": 0.8488, "step": 165870 }, { "epoch": 2.912270229463298, "grad_norm": 0.0652923709862853, "learning_rate": 7.821294634843678e-05, "loss": 0.8363, "step": 165880 }, { "epoch": 2.9124457943432995, "grad_norm": 0.06107177071902202, "learning_rate": 7.820210206780689e-05, "loss": 0.8437, "step": 165890 }, { "epoch": 2.912621359223301, "grad_norm": 0.0627301351450824, "learning_rate": 7.819125807603072e-05, "loss": 0.8361, "step": 165900 }, { "epoch": 2.9127969241033025, "grad_norm": 0.04557597049071098, "learning_rate": 7.818041437324568e-05, "loss": 0.8434, "step": 165910 }, { "epoch": 2.9129724889833035, "grad_norm": 0.05133549025355707, "learning_rate": 7.81695709595892e-05, "loss": 0.8424, "step": 165920 }, { "epoch": 2.9131480538633054, "grad_norm": 0.05197887747822707, "learning_rate": 7.81587278351986e-05, "loss": 0.8417, "step": 165930 }, { "epoch": 2.9133236187433065, "grad_norm": 0.05435068826125067, "learning_rate": 7.814788500021127e-05, "loss": 0.8409, "step": 165940 }, { "epoch": 2.913499183623308, "grad_norm": 0.1021368473580131, "learning_rate": 7.813704245476464e-05, "loss": 0.8468, "step": 165950 }, { "epoch": 2.9136747485033094, "grad_norm": 0.054436145222125004, "learning_rate": 7.812620019899608e-05, "loss": 0.8447, "step": 165960 }, { "epoch": 2.913850313383311, "grad_norm": 0.05270005599282899, "learning_rate": 7.811535823304293e-05, "loss": 0.8424, "step": 165970 }, { "epoch": 2.9140258782633124, "grad_norm": 0.05959146098223891, "learning_rate": 7.810451655704258e-05, "loss": 0.8459, "step": 165980 }, { "epoch": 2.9142014431433134, "grad_norm": 0.04508258371363016, "learning_rate": 7.809367517113241e-05, "loss": 0.8406, "step": 165990 }, { "epoch": 2.914377008023315, "grad_norm": 0.0727531640824531, "learning_rate": 7.808283407544975e-05, "loss": 0.8441, "step": 166000 }, { "epoch": 2.9145525729033164, "grad_norm": 0.058758635191030834, "learning_rate": 7.807199327013193e-05, "loss": 0.8458, "step": 166010 }, { "epoch": 2.914728137783318, "grad_norm": 0.0483319645919758, "learning_rate": 7.806115275531641e-05, "loss": 0.8396, "step": 166020 }, { "epoch": 2.9149037026633193, "grad_norm": 0.039960011856797865, "learning_rate": 7.805031253114048e-05, "loss": 0.8444, "step": 166030 }, { "epoch": 2.9150792675433204, "grad_norm": 0.06125641982138102, "learning_rate": 7.803947259774146e-05, "loss": 0.8505, "step": 166040 }, { "epoch": 2.9152548324233223, "grad_norm": 0.057504935479887685, "learning_rate": 7.802863295525674e-05, "loss": 0.8397, "step": 166050 }, { "epoch": 2.9154303973033233, "grad_norm": 0.04777985684367588, "learning_rate": 7.801779360382366e-05, "loss": 0.8417, "step": 166060 }, { "epoch": 2.915605962183325, "grad_norm": 0.04840367022976688, "learning_rate": 7.800695454357949e-05, "loss": 0.8264, "step": 166070 }, { "epoch": 2.9157815270633263, "grad_norm": 0.04174615101537317, "learning_rate": 7.799611577466164e-05, "loss": 0.8502, "step": 166080 }, { "epoch": 2.9159570919433278, "grad_norm": 0.06330348248941466, "learning_rate": 7.798527729720743e-05, "loss": 0.849, "step": 166090 }, { "epoch": 2.9161326568233292, "grad_norm": 0.052422612086748484, "learning_rate": 7.797443911135417e-05, "loss": 0.8413, "step": 166100 }, { "epoch": 2.9163082217033303, "grad_norm": 0.0487058399948113, "learning_rate": 7.796360121723912e-05, "loss": 0.8419, "step": 166110 }, { "epoch": 2.9164837865833317, "grad_norm": 0.050827726709818534, "learning_rate": 7.79527636149997e-05, "loss": 0.8465, "step": 166120 }, { "epoch": 2.916659351463333, "grad_norm": 0.05868166042161775, "learning_rate": 7.794192630477318e-05, "loss": 0.8505, "step": 166130 }, { "epoch": 2.9168349163433347, "grad_norm": 0.07103298814228204, "learning_rate": 7.793108928669686e-05, "loss": 0.8493, "step": 166140 }, { "epoch": 2.917010481223336, "grad_norm": 0.047186532602645416, "learning_rate": 7.792025256090809e-05, "loss": 0.8418, "step": 166150 }, { "epoch": 2.9171860461033376, "grad_norm": 0.0483025508335643, "learning_rate": 7.790941612754413e-05, "loss": 0.8377, "step": 166160 }, { "epoch": 2.917361610983339, "grad_norm": 0.050754332218129784, "learning_rate": 7.78985799867423e-05, "loss": 0.8422, "step": 166170 }, { "epoch": 2.91753717586334, "grad_norm": 0.05738464852893005, "learning_rate": 7.788774413863985e-05, "loss": 0.8357, "step": 166180 }, { "epoch": 2.9177127407433416, "grad_norm": 0.06868846570291848, "learning_rate": 7.787690858337414e-05, "loss": 0.8486, "step": 166190 }, { "epoch": 2.917888305623343, "grad_norm": 0.049933501666357474, "learning_rate": 7.786607332108243e-05, "loss": 0.8445, "step": 166200 }, { "epoch": 2.9180638705033446, "grad_norm": 0.04403981815529309, "learning_rate": 7.7855238351902e-05, "loss": 0.8469, "step": 166210 }, { "epoch": 2.918239435383346, "grad_norm": 0.07208491033598806, "learning_rate": 7.784440367597014e-05, "loss": 0.8425, "step": 166220 }, { "epoch": 2.918415000263347, "grad_norm": 0.05846288186459382, "learning_rate": 7.783356929342413e-05, "loss": 0.8395, "step": 166230 }, { "epoch": 2.9185905651433486, "grad_norm": 0.04911420198667091, "learning_rate": 7.782273520440123e-05, "loss": 0.8476, "step": 166240 }, { "epoch": 2.91876613002335, "grad_norm": 0.048189081147507314, "learning_rate": 7.781190140903871e-05, "loss": 0.8478, "step": 166250 }, { "epoch": 2.9189416949033515, "grad_norm": 0.04810481457151729, "learning_rate": 7.780106790747386e-05, "loss": 0.8487, "step": 166260 }, { "epoch": 2.919117259783353, "grad_norm": 0.052769526215993415, "learning_rate": 7.779023469984392e-05, "loss": 0.845, "step": 166270 }, { "epoch": 2.9192928246633545, "grad_norm": 0.0632756804635532, "learning_rate": 7.777940178628611e-05, "loss": 0.8424, "step": 166280 }, { "epoch": 2.919468389543356, "grad_norm": 0.06497860831767845, "learning_rate": 7.776856916693777e-05, "loss": 0.8387, "step": 166290 }, { "epoch": 2.919643954423357, "grad_norm": 0.05688427138092298, "learning_rate": 7.775773684193612e-05, "loss": 0.8464, "step": 166300 }, { "epoch": 2.9198195193033585, "grad_norm": 0.05641670204015309, "learning_rate": 7.774690481141839e-05, "loss": 0.8431, "step": 166310 }, { "epoch": 2.91999508418336, "grad_norm": 0.048705652882684705, "learning_rate": 7.773607307552183e-05, "loss": 0.8413, "step": 166320 }, { "epoch": 2.9201706490633614, "grad_norm": 0.07836644853790159, "learning_rate": 7.772524163438369e-05, "loss": 0.8459, "step": 166330 }, { "epoch": 2.920346213943363, "grad_norm": 0.056259123268985294, "learning_rate": 7.77144104881412e-05, "loss": 0.8363, "step": 166340 }, { "epoch": 2.920521778823364, "grad_norm": 0.050450250684245766, "learning_rate": 7.770357963693155e-05, "loss": 0.8419, "step": 166350 }, { "epoch": 2.9206973437033654, "grad_norm": 0.05907650952413917, "learning_rate": 7.769274908089206e-05, "loss": 0.8441, "step": 166360 }, { "epoch": 2.920872908583367, "grad_norm": 0.05909657468510653, "learning_rate": 7.76819188201599e-05, "loss": 0.835, "step": 166370 }, { "epoch": 2.9210484734633684, "grad_norm": 0.05551637686590776, "learning_rate": 7.767108885487227e-05, "loss": 0.8408, "step": 166380 }, { "epoch": 2.92122403834337, "grad_norm": 0.06469456838254341, "learning_rate": 7.766025918516644e-05, "loss": 0.842, "step": 166390 }, { "epoch": 2.9213996032233713, "grad_norm": 0.0846292278031828, "learning_rate": 7.76494298111796e-05, "loss": 0.8484, "step": 166400 }, { "epoch": 2.921575168103373, "grad_norm": 0.05668584676318539, "learning_rate": 7.763860073304895e-05, "loss": 0.8431, "step": 166410 }, { "epoch": 2.921750732983374, "grad_norm": 0.0515852774591227, "learning_rate": 7.762777195091172e-05, "loss": 0.8446, "step": 166420 }, { "epoch": 2.9219262978633753, "grad_norm": 0.060572032865394025, "learning_rate": 7.761694346490507e-05, "loss": 0.8446, "step": 166430 }, { "epoch": 2.922101862743377, "grad_norm": 0.05912812394225451, "learning_rate": 7.760611527516626e-05, "loss": 0.8345, "step": 166440 }, { "epoch": 2.9222774276233783, "grad_norm": 0.05604543423936016, "learning_rate": 7.759528738183241e-05, "loss": 0.8382, "step": 166450 }, { "epoch": 2.9224529925033798, "grad_norm": 0.05040745375908166, "learning_rate": 7.758445978504079e-05, "loss": 0.8485, "step": 166460 }, { "epoch": 2.922628557383381, "grad_norm": 0.07513519436534286, "learning_rate": 7.757363248492855e-05, "loss": 0.8446, "step": 166470 }, { "epoch": 2.9228041222633827, "grad_norm": 0.05736943771844863, "learning_rate": 7.756280548163285e-05, "loss": 0.848, "step": 166480 }, { "epoch": 2.9229796871433837, "grad_norm": 0.04710239753109713, "learning_rate": 7.755197877529094e-05, "loss": 0.8312, "step": 166490 }, { "epoch": 2.923155252023385, "grad_norm": 0.04653874204967587, "learning_rate": 7.754115236603992e-05, "loss": 0.8419, "step": 166500 }, { "epoch": 2.9233308169033867, "grad_norm": 0.03955193040174793, "learning_rate": 7.753032625401702e-05, "loss": 0.8501, "step": 166510 }, { "epoch": 2.923506381783388, "grad_norm": 0.049023372795033465, "learning_rate": 7.751950043935933e-05, "loss": 0.8412, "step": 166520 }, { "epoch": 2.9236819466633897, "grad_norm": 0.051459062197955124, "learning_rate": 7.750867492220411e-05, "loss": 0.8442, "step": 166530 }, { "epoch": 2.9238575115433907, "grad_norm": 0.0485287147939274, "learning_rate": 7.749784970268848e-05, "loss": 0.8502, "step": 166540 }, { "epoch": 2.924033076423392, "grad_norm": 0.04345010354523265, "learning_rate": 7.748702478094959e-05, "loss": 0.8492, "step": 166550 }, { "epoch": 2.9242086413033936, "grad_norm": 0.06657553596398272, "learning_rate": 7.747620015712461e-05, "loss": 0.847, "step": 166560 }, { "epoch": 2.924384206183395, "grad_norm": 0.059315573667107824, "learning_rate": 7.746537583135069e-05, "loss": 0.8407, "step": 166570 }, { "epoch": 2.9245597710633966, "grad_norm": 0.060749478477166066, "learning_rate": 7.745455180376495e-05, "loss": 0.8421, "step": 166580 }, { "epoch": 2.9247353359433976, "grad_norm": 0.06506337191750973, "learning_rate": 7.744372807450457e-05, "loss": 0.8474, "step": 166590 }, { "epoch": 2.9249109008233996, "grad_norm": 0.04640862234833421, "learning_rate": 7.743290464370668e-05, "loss": 0.8417, "step": 166600 }, { "epoch": 2.9250864657034006, "grad_norm": 0.062418411281024884, "learning_rate": 7.742208151150838e-05, "loss": 0.8345, "step": 166610 }, { "epoch": 2.925262030583402, "grad_norm": 0.05400079145610111, "learning_rate": 7.74112586780468e-05, "loss": 0.8497, "step": 166620 }, { "epoch": 2.9254375954634035, "grad_norm": 0.06213927992558956, "learning_rate": 7.740043614345913e-05, "loss": 0.8409, "step": 166630 }, { "epoch": 2.925613160343405, "grad_norm": 0.05906238946124199, "learning_rate": 7.738961390788247e-05, "loss": 0.8427, "step": 166640 }, { "epoch": 2.9257887252234065, "grad_norm": 0.05148532741739543, "learning_rate": 7.737879197145389e-05, "loss": 0.8486, "step": 166650 }, { "epoch": 2.9259642901034075, "grad_norm": 0.04730806105614654, "learning_rate": 7.736797033431058e-05, "loss": 0.8415, "step": 166660 }, { "epoch": 2.926139854983409, "grad_norm": 0.0607120728418609, "learning_rate": 7.735714899658959e-05, "loss": 0.8454, "step": 166670 }, { "epoch": 2.9263154198634105, "grad_norm": 0.0513334301788394, "learning_rate": 7.734632795842806e-05, "loss": 0.8409, "step": 166680 }, { "epoch": 2.926490984743412, "grad_norm": 0.042953905908412554, "learning_rate": 7.733550721996307e-05, "loss": 0.8404, "step": 166690 }, { "epoch": 2.9266665496234134, "grad_norm": 0.04763087114728016, "learning_rate": 7.732468678133176e-05, "loss": 0.8446, "step": 166700 }, { "epoch": 2.9268421145034145, "grad_norm": 0.051061153826170894, "learning_rate": 7.731386664267121e-05, "loss": 0.85, "step": 166710 }, { "epoch": 2.9270176793834164, "grad_norm": 0.058634711522613375, "learning_rate": 7.730304680411848e-05, "loss": 0.8438, "step": 166720 }, { "epoch": 2.9271932442634174, "grad_norm": 0.05197016540511245, "learning_rate": 7.729222726581072e-05, "loss": 0.8403, "step": 166730 }, { "epoch": 2.927368809143419, "grad_norm": 0.051707658920558244, "learning_rate": 7.728140802788499e-05, "loss": 0.8515, "step": 166740 }, { "epoch": 2.9275443740234204, "grad_norm": 0.043830458310189044, "learning_rate": 7.727058909047834e-05, "loss": 0.8463, "step": 166750 }, { "epoch": 2.927719938903422, "grad_norm": 0.07002949592372561, "learning_rate": 7.725977045372788e-05, "loss": 0.8413, "step": 166760 }, { "epoch": 2.9278955037834233, "grad_norm": 0.047407768386804744, "learning_rate": 7.72489521177707e-05, "loss": 0.8385, "step": 166770 }, { "epoch": 2.9280710686634244, "grad_norm": 0.0460042786531674, "learning_rate": 7.723813408274383e-05, "loss": 0.8399, "step": 166780 }, { "epoch": 2.928246633543426, "grad_norm": 0.058379525170610624, "learning_rate": 7.722731634878432e-05, "loss": 0.8425, "step": 166790 }, { "epoch": 2.9284221984234273, "grad_norm": 0.047737995343531514, "learning_rate": 7.721649891602931e-05, "loss": 0.8416, "step": 166800 }, { "epoch": 2.928597763303429, "grad_norm": 0.045241707418165446, "learning_rate": 7.720568178461583e-05, "loss": 0.8429, "step": 166810 }, { "epoch": 2.9287733281834303, "grad_norm": 0.051490168222875554, "learning_rate": 7.719486495468088e-05, "loss": 0.8419, "step": 166820 }, { "epoch": 2.9289488930634318, "grad_norm": 0.04828833016454676, "learning_rate": 7.718404842636159e-05, "loss": 0.8402, "step": 166830 }, { "epoch": 2.9291244579434332, "grad_norm": 0.06240246129451937, "learning_rate": 7.717323219979497e-05, "loss": 0.8404, "step": 166840 }, { "epoch": 2.9293000228234343, "grad_norm": 0.05862251942122953, "learning_rate": 7.716241627511806e-05, "loss": 0.8422, "step": 166850 }, { "epoch": 2.9294755877034357, "grad_norm": 0.08241112629659283, "learning_rate": 7.715160065246783e-05, "loss": 0.8345, "step": 166860 }, { "epoch": 2.9296511525834372, "grad_norm": 0.07896378187248665, "learning_rate": 7.714078533198148e-05, "loss": 0.8381, "step": 166870 }, { "epoch": 2.9298267174634387, "grad_norm": 0.0657651100751414, "learning_rate": 7.712997031379595e-05, "loss": 0.8372, "step": 166880 }, { "epoch": 2.93000228234344, "grad_norm": 0.057413882201555265, "learning_rate": 7.711915559804822e-05, "loss": 0.8434, "step": 166890 }, { "epoch": 2.930177847223441, "grad_norm": 0.05328435909952621, "learning_rate": 7.71083411848754e-05, "loss": 0.8469, "step": 166900 }, { "epoch": 2.9303534121034427, "grad_norm": 0.047516414734544904, "learning_rate": 7.709752707441449e-05, "loss": 0.8429, "step": 166910 }, { "epoch": 2.930528976983444, "grad_norm": 0.0710805986910066, "learning_rate": 7.708671326680246e-05, "loss": 0.8507, "step": 166920 }, { "epoch": 2.9307045418634456, "grad_norm": 0.062334487088238154, "learning_rate": 7.707589976217638e-05, "loss": 0.847, "step": 166930 }, { "epoch": 2.930880106743447, "grad_norm": 0.04636308537665379, "learning_rate": 7.706508656067322e-05, "loss": 0.8457, "step": 166940 }, { "epoch": 2.9310556716234486, "grad_norm": 0.05185100728605853, "learning_rate": 7.705427366243001e-05, "loss": 0.8419, "step": 166950 }, { "epoch": 2.93123123650345, "grad_norm": 0.050931917173152236, "learning_rate": 7.70434610675837e-05, "loss": 0.8525, "step": 166960 }, { "epoch": 2.931406801383451, "grad_norm": 0.043980088508648145, "learning_rate": 7.703264877627137e-05, "loss": 0.8502, "step": 166970 }, { "epoch": 2.9315823662634526, "grad_norm": 0.059125002572070026, "learning_rate": 7.702183678862999e-05, "loss": 0.843, "step": 166980 }, { "epoch": 2.931757931143454, "grad_norm": 0.0663576935900154, "learning_rate": 7.70110251047965e-05, "loss": 0.8437, "step": 166990 }, { "epoch": 2.9319334960234555, "grad_norm": 0.04882193263292663, "learning_rate": 7.700021372490793e-05, "loss": 0.8404, "step": 167000 }, { "epoch": 2.932109060903457, "grad_norm": 0.06144883620233042, "learning_rate": 7.698940264910127e-05, "loss": 0.8432, "step": 167010 }, { "epoch": 2.932284625783458, "grad_norm": 0.06782261950577774, "learning_rate": 7.697859187751345e-05, "loss": 0.835, "step": 167020 }, { "epoch": 2.9324601906634595, "grad_norm": 0.05457665092446601, "learning_rate": 7.696778141028146e-05, "loss": 0.8409, "step": 167030 }, { "epoch": 2.932635755543461, "grad_norm": 0.05260217311915117, "learning_rate": 7.695697124754233e-05, "loss": 0.8428, "step": 167040 }, { "epoch": 2.9328113204234625, "grad_norm": 0.061349883719698926, "learning_rate": 7.694616138943296e-05, "loss": 0.8366, "step": 167050 }, { "epoch": 2.932986885303464, "grad_norm": 0.05346974844369348, "learning_rate": 7.693535183609033e-05, "loss": 0.8386, "step": 167060 }, { "epoch": 2.9331624501834654, "grad_norm": 0.053809009652835575, "learning_rate": 7.692454258765141e-05, "loss": 0.8431, "step": 167070 }, { "epoch": 2.933338015063467, "grad_norm": 0.06176942948813524, "learning_rate": 7.691373364425318e-05, "loss": 0.8385, "step": 167080 }, { "epoch": 2.933513579943468, "grad_norm": 0.05946631390305561, "learning_rate": 7.690292500603252e-05, "loss": 0.8397, "step": 167090 }, { "epoch": 2.9336891448234694, "grad_norm": 0.07324454811440743, "learning_rate": 7.689211667312644e-05, "loss": 0.8412, "step": 167100 }, { "epoch": 2.933864709703471, "grad_norm": 0.05704678130269038, "learning_rate": 7.688130864567186e-05, "loss": 0.847, "step": 167110 }, { "epoch": 2.9340402745834724, "grad_norm": 0.0716168039490432, "learning_rate": 7.687050092380573e-05, "loss": 0.8446, "step": 167120 }, { "epoch": 2.934215839463474, "grad_norm": 0.05770671084572743, "learning_rate": 7.685969350766491e-05, "loss": 0.8326, "step": 167130 }, { "epoch": 2.934391404343475, "grad_norm": 0.045246851124167986, "learning_rate": 7.684888639738647e-05, "loss": 0.8454, "step": 167140 }, { "epoch": 2.9345669692234764, "grad_norm": 0.04458263834904529, "learning_rate": 7.683807959310726e-05, "loss": 0.841, "step": 167150 }, { "epoch": 2.934742534103478, "grad_norm": 0.053847472729427415, "learning_rate": 7.68272730949642e-05, "loss": 0.8404, "step": 167160 }, { "epoch": 2.9349180989834793, "grad_norm": 0.05483141185636448, "learning_rate": 7.681646690309424e-05, "loss": 0.8413, "step": 167170 }, { "epoch": 2.935093663863481, "grad_norm": 0.05075179863469563, "learning_rate": 7.680566101763427e-05, "loss": 0.8416, "step": 167180 }, { "epoch": 2.9352692287434823, "grad_norm": 0.048571346454187626, "learning_rate": 7.67948554387212e-05, "loss": 0.8357, "step": 167190 }, { "epoch": 2.9354447936234838, "grad_norm": 0.0510030599497992, "learning_rate": 7.678405016649193e-05, "loss": 0.8343, "step": 167200 }, { "epoch": 2.935620358503485, "grad_norm": 0.052926795884638424, "learning_rate": 7.677324520108343e-05, "loss": 0.8405, "step": 167210 }, { "epoch": 2.9357959233834863, "grad_norm": 0.07167274464752811, "learning_rate": 7.676244054263255e-05, "loss": 0.8491, "step": 167220 }, { "epoch": 2.9359714882634877, "grad_norm": 0.051856992041895415, "learning_rate": 7.675163619127617e-05, "loss": 0.8449, "step": 167230 }, { "epoch": 2.9361470531434892, "grad_norm": 0.05258186355520015, "learning_rate": 7.674083214715122e-05, "loss": 0.8443, "step": 167240 }, { "epoch": 2.9363226180234907, "grad_norm": 0.051522462079176316, "learning_rate": 7.673002841039458e-05, "loss": 0.8474, "step": 167250 }, { "epoch": 2.9364981829034917, "grad_norm": 0.04765901651546467, "learning_rate": 7.671922498114313e-05, "loss": 0.8371, "step": 167260 }, { "epoch": 2.9366737477834937, "grad_norm": 0.05453116348929225, "learning_rate": 7.670842185953374e-05, "loss": 0.8396, "step": 167270 }, { "epoch": 2.9368493126634947, "grad_norm": 0.07337771944900864, "learning_rate": 7.669761904570333e-05, "loss": 0.8369, "step": 167280 }, { "epoch": 2.937024877543496, "grad_norm": 0.05465935556667515, "learning_rate": 7.668681653978874e-05, "loss": 0.8471, "step": 167290 }, { "epoch": 2.9372004424234976, "grad_norm": 0.0416806972932303, "learning_rate": 7.667601434192678e-05, "loss": 0.8448, "step": 167300 }, { "epoch": 2.937376007303499, "grad_norm": 0.04974689739762725, "learning_rate": 7.666521245225444e-05, "loss": 0.8406, "step": 167310 }, { "epoch": 2.9375515721835006, "grad_norm": 0.057115272496869124, "learning_rate": 7.665441087090852e-05, "loss": 0.8459, "step": 167320 }, { "epoch": 2.9377271370635016, "grad_norm": 0.05542625134889735, "learning_rate": 7.664360959802586e-05, "loss": 0.8469, "step": 167330 }, { "epoch": 2.937902701943503, "grad_norm": 0.05470042737506913, "learning_rate": 7.663280863374335e-05, "loss": 0.8362, "step": 167340 }, { "epoch": 2.9380782668235046, "grad_norm": 0.04903613653776323, "learning_rate": 7.662200797819781e-05, "loss": 0.8424, "step": 167350 }, { "epoch": 2.938253831703506, "grad_norm": 0.06690396571934669, "learning_rate": 7.661120763152614e-05, "loss": 0.8381, "step": 167360 }, { "epoch": 2.9384293965835075, "grad_norm": 0.042971797690049156, "learning_rate": 7.660040759386508e-05, "loss": 0.8372, "step": 167370 }, { "epoch": 2.9386049614635086, "grad_norm": 0.06032063780906177, "learning_rate": 7.658960786535157e-05, "loss": 0.8432, "step": 167380 }, { "epoch": 2.9387805263435105, "grad_norm": 0.06048670555213265, "learning_rate": 7.657880844612241e-05, "loss": 0.8412, "step": 167390 }, { "epoch": 2.9389560912235115, "grad_norm": 0.04817859239173368, "learning_rate": 7.65680093363144e-05, "loss": 0.8467, "step": 167400 }, { "epoch": 2.939131656103513, "grad_norm": 0.05575193527077909, "learning_rate": 7.655721053606442e-05, "loss": 0.8444, "step": 167410 }, { "epoch": 2.9393072209835145, "grad_norm": 0.05483906984470944, "learning_rate": 7.654641204550928e-05, "loss": 0.8404, "step": 167420 }, { "epoch": 2.939482785863516, "grad_norm": 0.045493182766710674, "learning_rate": 7.653561386478574e-05, "loss": 0.8449, "step": 167430 }, { "epoch": 2.9396583507435174, "grad_norm": 0.05603244855533851, "learning_rate": 7.65248159940307e-05, "loss": 0.8474, "step": 167440 }, { "epoch": 2.9398339156235185, "grad_norm": 0.05853603231741939, "learning_rate": 7.651401843338093e-05, "loss": 0.8342, "step": 167450 }, { "epoch": 2.94000948050352, "grad_norm": 0.05566164231456087, "learning_rate": 7.650322118297323e-05, "loss": 0.8402, "step": 167460 }, { "epoch": 2.9401850453835214, "grad_norm": 0.051713883259529914, "learning_rate": 7.649242424294439e-05, "loss": 0.8395, "step": 167470 }, { "epoch": 2.940360610263523, "grad_norm": 0.047275120689416236, "learning_rate": 7.648162761343125e-05, "loss": 0.845, "step": 167480 }, { "epoch": 2.9405361751435244, "grad_norm": 0.08252658073536305, "learning_rate": 7.64708312945706e-05, "loss": 0.8464, "step": 167490 }, { "epoch": 2.9407117400235254, "grad_norm": 0.04987927584632699, "learning_rate": 7.646003528649918e-05, "loss": 0.838, "step": 167500 }, { "epoch": 2.9408873049035273, "grad_norm": 0.054047057749771595, "learning_rate": 7.644923958935388e-05, "loss": 0.8257, "step": 167510 }, { "epoch": 2.9410628697835284, "grad_norm": 0.059793250962775664, "learning_rate": 7.643844420327138e-05, "loss": 0.8393, "step": 167520 }, { "epoch": 2.94123843466353, "grad_norm": 0.062234001157796606, "learning_rate": 7.642764912838851e-05, "loss": 0.8448, "step": 167530 }, { "epoch": 2.9414139995435313, "grad_norm": 0.04659344276845347, "learning_rate": 7.641685436484198e-05, "loss": 0.842, "step": 167540 }, { "epoch": 2.941589564423533, "grad_norm": 0.05611422703837808, "learning_rate": 7.640605991276869e-05, "loss": 0.8406, "step": 167550 }, { "epoch": 2.9417651293035343, "grad_norm": 0.04028619650795887, "learning_rate": 7.639526577230532e-05, "loss": 0.8457, "step": 167560 }, { "epoch": 2.9419406941835353, "grad_norm": 0.05894137818495544, "learning_rate": 7.638447194358861e-05, "loss": 0.8435, "step": 167570 }, { "epoch": 2.942116259063537, "grad_norm": 0.07160573090231345, "learning_rate": 7.637367842675539e-05, "loss": 0.8359, "step": 167580 }, { "epoch": 2.9422918239435383, "grad_norm": 0.053497757981359736, "learning_rate": 7.63628852219424e-05, "loss": 0.8494, "step": 167590 }, { "epoch": 2.9424673888235398, "grad_norm": 0.06012371987072552, "learning_rate": 7.635209232928634e-05, "loss": 0.8436, "step": 167600 }, { "epoch": 2.9426429537035412, "grad_norm": 0.07031234370640112, "learning_rate": 7.634129974892401e-05, "loss": 0.8467, "step": 167610 }, { "epoch": 2.9428185185835427, "grad_norm": 0.06270809944207574, "learning_rate": 7.633050748099215e-05, "loss": 0.8533, "step": 167620 }, { "epoch": 2.942994083463544, "grad_norm": 0.04683057850854764, "learning_rate": 7.631971552562748e-05, "loss": 0.8372, "step": 167630 }, { "epoch": 2.943169648343545, "grad_norm": 0.046914694623841936, "learning_rate": 7.630892388296671e-05, "loss": 0.8452, "step": 167640 }, { "epoch": 2.9433452132235467, "grad_norm": 0.05148026107676541, "learning_rate": 7.629813255314663e-05, "loss": 0.8417, "step": 167650 }, { "epoch": 2.943520778103548, "grad_norm": 0.05738833491848509, "learning_rate": 7.628734153630395e-05, "loss": 0.84, "step": 167660 }, { "epoch": 2.9436963429835497, "grad_norm": 0.045503100913919174, "learning_rate": 7.627655083257539e-05, "loss": 0.8423, "step": 167670 }, { "epoch": 2.943871907863551, "grad_norm": 0.05465802373924811, "learning_rate": 7.626576044209768e-05, "loss": 0.8419, "step": 167680 }, { "epoch": 2.944047472743552, "grad_norm": 0.052361837595991584, "learning_rate": 7.625497036500752e-05, "loss": 0.8377, "step": 167690 }, { "epoch": 2.9442230376235536, "grad_norm": 0.060158176810609434, "learning_rate": 7.624418060144162e-05, "loss": 0.8445, "step": 167700 }, { "epoch": 2.944398602503555, "grad_norm": 0.057607989439654075, "learning_rate": 7.623339115153666e-05, "loss": 0.8406, "step": 167710 }, { "epoch": 2.9445741673835566, "grad_norm": 0.05505641573742459, "learning_rate": 7.622260201542944e-05, "loss": 0.8391, "step": 167720 }, { "epoch": 2.944749732263558, "grad_norm": 0.048266621179283695, "learning_rate": 7.621181319325658e-05, "loss": 0.8453, "step": 167730 }, { "epoch": 2.9449252971435596, "grad_norm": 0.06087906420928808, "learning_rate": 7.620102468515477e-05, "loss": 0.8543, "step": 167740 }, { "epoch": 2.945100862023561, "grad_norm": 0.0569085967411823, "learning_rate": 7.619023649126078e-05, "loss": 0.8486, "step": 167750 }, { "epoch": 2.945276426903562, "grad_norm": 0.11252056267459147, "learning_rate": 7.617944861171123e-05, "loss": 0.8449, "step": 167760 }, { "epoch": 2.9454519917835635, "grad_norm": 0.05788285861172505, "learning_rate": 7.61686610466428e-05, "loss": 0.8369, "step": 167770 }, { "epoch": 2.945627556663565, "grad_norm": 0.04373757203857897, "learning_rate": 7.615787379619223e-05, "loss": 0.8356, "step": 167780 }, { "epoch": 2.9458031215435665, "grad_norm": 0.05427874840646249, "learning_rate": 7.614708686049615e-05, "loss": 0.8455, "step": 167790 }, { "epoch": 2.945978686423568, "grad_norm": 0.06652827925563219, "learning_rate": 7.613630023969124e-05, "loss": 0.841, "step": 167800 }, { "epoch": 2.946154251303569, "grad_norm": 0.057269712352646336, "learning_rate": 7.612551393391412e-05, "loss": 0.8444, "step": 167810 }, { "epoch": 2.9463298161835705, "grad_norm": 0.04681576222835532, "learning_rate": 7.611472794330157e-05, "loss": 0.8429, "step": 167820 }, { "epoch": 2.946505381063572, "grad_norm": 0.06720197087811926, "learning_rate": 7.610394226799017e-05, "loss": 0.8349, "step": 167830 }, { "epoch": 2.9466809459435734, "grad_norm": 0.06340060315270866, "learning_rate": 7.609315690811658e-05, "loss": 0.8469, "step": 167840 }, { "epoch": 2.946856510823575, "grad_norm": 0.056064844517107304, "learning_rate": 7.608237186381748e-05, "loss": 0.8426, "step": 167850 }, { "epoch": 2.9470320757035764, "grad_norm": 0.05897118214110981, "learning_rate": 7.607158713522951e-05, "loss": 0.8376, "step": 167860 }, { "epoch": 2.947207640583578, "grad_norm": 0.05875696078994256, "learning_rate": 7.60608027224893e-05, "loss": 0.8449, "step": 167870 }, { "epoch": 2.947383205463579, "grad_norm": 0.053709030571638394, "learning_rate": 7.605001862573346e-05, "loss": 0.8467, "step": 167880 }, { "epoch": 2.9475587703435804, "grad_norm": 0.05475490886281946, "learning_rate": 7.603923484509871e-05, "loss": 0.8447, "step": 167890 }, { "epoch": 2.947734335223582, "grad_norm": 0.06743129615930185, "learning_rate": 7.602845138072164e-05, "loss": 0.8415, "step": 167900 }, { "epoch": 2.9479099001035833, "grad_norm": 0.05086987174609467, "learning_rate": 7.601766823273886e-05, "loss": 0.8437, "step": 167910 }, { "epoch": 2.948085464983585, "grad_norm": 0.0587244108503237, "learning_rate": 7.600688540128701e-05, "loss": 0.8382, "step": 167920 }, { "epoch": 2.948261029863586, "grad_norm": 0.05286717918118161, "learning_rate": 7.599610288650274e-05, "loss": 0.843, "step": 167930 }, { "epoch": 2.9484365947435878, "grad_norm": 0.05086535414070614, "learning_rate": 7.598532068852261e-05, "loss": 0.8474, "step": 167940 }, { "epoch": 2.948612159623589, "grad_norm": 0.0405764007780522, "learning_rate": 7.597453880748328e-05, "loss": 0.8493, "step": 167950 }, { "epoch": 2.9487877245035903, "grad_norm": 0.04896297165384287, "learning_rate": 7.596375724352134e-05, "loss": 0.8431, "step": 167960 }, { "epoch": 2.9489632893835918, "grad_norm": 0.043128019717402453, "learning_rate": 7.595297599677338e-05, "loss": 0.8475, "step": 167970 }, { "epoch": 2.9491388542635932, "grad_norm": 0.046912508085332004, "learning_rate": 7.594219506737599e-05, "loss": 0.8461, "step": 167980 }, { "epoch": 2.9493144191435947, "grad_norm": 0.04703778561653673, "learning_rate": 7.593141445546583e-05, "loss": 0.8408, "step": 167990 }, { "epoch": 2.9494899840235957, "grad_norm": 0.045342409411449194, "learning_rate": 7.592063416117944e-05, "loss": 0.839, "step": 168000 }, { "epoch": 2.9496655489035972, "grad_norm": 0.05593102131841939, "learning_rate": 7.590985418465341e-05, "loss": 0.8474, "step": 168010 }, { "epoch": 2.9498411137835987, "grad_norm": 0.059301676448780705, "learning_rate": 7.589907452602436e-05, "loss": 0.841, "step": 168020 }, { "epoch": 2.9500166786636, "grad_norm": 0.05194284643239799, "learning_rate": 7.588829518542884e-05, "loss": 0.8465, "step": 168030 }, { "epoch": 2.9501922435436017, "grad_norm": 0.05529019323184446, "learning_rate": 7.587751616300343e-05, "loss": 0.8428, "step": 168040 }, { "epoch": 2.9503678084236027, "grad_norm": 0.04808647361527978, "learning_rate": 7.586673745888468e-05, "loss": 0.8488, "step": 168050 }, { "epoch": 2.9505433733036046, "grad_norm": 0.04975819342008567, "learning_rate": 7.585595907320921e-05, "loss": 0.8402, "step": 168060 }, { "epoch": 2.9507189381836056, "grad_norm": 0.0586336065148446, "learning_rate": 7.584518100611356e-05, "loss": 0.8375, "step": 168070 }, { "epoch": 2.950894503063607, "grad_norm": 0.07539323878946506, "learning_rate": 7.583440325773427e-05, "loss": 0.8394, "step": 168080 }, { "epoch": 2.9510700679436086, "grad_norm": 0.039633913099940576, "learning_rate": 7.582362582820794e-05, "loss": 0.845, "step": 168090 }, { "epoch": 2.95124563282361, "grad_norm": 0.06677743474199084, "learning_rate": 7.581284871767107e-05, "loss": 0.8482, "step": 168100 }, { "epoch": 2.9514211977036116, "grad_norm": 0.0467693023524509, "learning_rate": 7.580207192626023e-05, "loss": 0.8389, "step": 168110 }, { "epoch": 2.9515967625836126, "grad_norm": 0.05723427041565265, "learning_rate": 7.5791295454112e-05, "loss": 0.8421, "step": 168120 }, { "epoch": 2.951772327463614, "grad_norm": 0.0468938007776844, "learning_rate": 7.578051930136287e-05, "loss": 0.8367, "step": 168130 }, { "epoch": 2.9519478923436155, "grad_norm": 0.061955339527713794, "learning_rate": 7.57697434681494e-05, "loss": 0.8411, "step": 168140 }, { "epoch": 2.952123457223617, "grad_norm": 0.050350681248804804, "learning_rate": 7.575896795460807e-05, "loss": 0.8433, "step": 168150 }, { "epoch": 2.9522990221036185, "grad_norm": 0.051714408132565635, "learning_rate": 7.574819276087552e-05, "loss": 0.8385, "step": 168160 }, { "epoch": 2.9524745869836195, "grad_norm": 0.07381364169206747, "learning_rate": 7.573741788708818e-05, "loss": 0.8419, "step": 168170 }, { "epoch": 2.9526501518636215, "grad_norm": 0.05870476025688064, "learning_rate": 7.572664333338259e-05, "loss": 0.8387, "step": 168180 }, { "epoch": 2.9528257167436225, "grad_norm": 0.04114187498376406, "learning_rate": 7.571586909989527e-05, "loss": 0.8391, "step": 168190 }, { "epoch": 2.953001281623624, "grad_norm": 0.03838024391843325, "learning_rate": 7.570509518676276e-05, "loss": 0.8355, "step": 168200 }, { "epoch": 2.9531768465036254, "grad_norm": 0.03969178182245593, "learning_rate": 7.569432159412154e-05, "loss": 0.854, "step": 168210 }, { "epoch": 2.953352411383627, "grad_norm": 0.052199144822701424, "learning_rate": 7.568354832210807e-05, "loss": 0.8408, "step": 168220 }, { "epoch": 2.9535279762636284, "grad_norm": 0.055050452920475314, "learning_rate": 7.567277537085893e-05, "loss": 0.8422, "step": 168230 }, { "epoch": 2.9537035411436294, "grad_norm": 0.06902453854859493, "learning_rate": 7.566200274051059e-05, "loss": 0.8463, "step": 168240 }, { "epoch": 2.953879106023631, "grad_norm": 0.048937878770431036, "learning_rate": 7.56512304311995e-05, "loss": 0.8474, "step": 168250 }, { "epoch": 2.9540546709036324, "grad_norm": 0.12478754849888977, "learning_rate": 7.564045844306219e-05, "loss": 0.8434, "step": 168260 }, { "epoch": 2.954230235783634, "grad_norm": 0.08442486199663052, "learning_rate": 7.562968677623516e-05, "loss": 0.8358, "step": 168270 }, { "epoch": 2.9544058006636353, "grad_norm": 0.04656447768474815, "learning_rate": 7.561891543085481e-05, "loss": 0.8503, "step": 168280 }, { "epoch": 2.954581365543637, "grad_norm": 0.05944589916537408, "learning_rate": 7.560814440705771e-05, "loss": 0.8357, "step": 168290 }, { "epoch": 2.9547569304236383, "grad_norm": 0.05540753404013863, "learning_rate": 7.559737370498027e-05, "loss": 0.8427, "step": 168300 }, { "epoch": 2.9549324953036393, "grad_norm": 0.047349409590548175, "learning_rate": 7.558660332475899e-05, "loss": 0.844, "step": 168310 }, { "epoch": 2.955108060183641, "grad_norm": 0.04957502791727881, "learning_rate": 7.557583326653026e-05, "loss": 0.8459, "step": 168320 }, { "epoch": 2.9552836250636423, "grad_norm": 0.061507967461701105, "learning_rate": 7.556506353043064e-05, "loss": 0.8434, "step": 168330 }, { "epoch": 2.9554591899436438, "grad_norm": 0.06635647581897161, "learning_rate": 7.555429411659654e-05, "loss": 0.8415, "step": 168340 }, { "epoch": 2.9556347548236452, "grad_norm": 0.07663879783747689, "learning_rate": 7.55435250251644e-05, "loss": 0.8404, "step": 168350 }, { "epoch": 2.9558103197036463, "grad_norm": 0.05736600317411771, "learning_rate": 7.553275625627068e-05, "loss": 0.8479, "step": 168360 }, { "epoch": 2.9559858845836477, "grad_norm": 0.04523658092900591, "learning_rate": 7.552198781005182e-05, "loss": 0.8431, "step": 168370 }, { "epoch": 2.9561614494636492, "grad_norm": 0.04637458802874618, "learning_rate": 7.551121968664427e-05, "loss": 0.8346, "step": 168380 }, { "epoch": 2.9563370143436507, "grad_norm": 0.05955157292837442, "learning_rate": 7.550045188618442e-05, "loss": 0.8497, "step": 168390 }, { "epoch": 2.956512579223652, "grad_norm": 0.05181297075228381, "learning_rate": 7.548968440880876e-05, "loss": 0.8418, "step": 168400 }, { "epoch": 2.9566881441036537, "grad_norm": 0.04625287632300723, "learning_rate": 7.547891725465369e-05, "loss": 0.8464, "step": 168410 }, { "epoch": 2.956863708983655, "grad_norm": 0.049351324853537014, "learning_rate": 7.546815042385559e-05, "loss": 0.8407, "step": 168420 }, { "epoch": 2.957039273863656, "grad_norm": 0.06324081034453582, "learning_rate": 7.545738391655097e-05, "loss": 0.8463, "step": 168430 }, { "epoch": 2.9572148387436576, "grad_norm": 0.05456528000535669, "learning_rate": 7.544661773287617e-05, "loss": 0.8366, "step": 168440 }, { "epoch": 2.957390403623659, "grad_norm": 0.0641583877446992, "learning_rate": 7.54358518729676e-05, "loss": 0.8427, "step": 168450 }, { "epoch": 2.9575659685036606, "grad_norm": 0.04729272686481462, "learning_rate": 7.542508633696171e-05, "loss": 0.8449, "step": 168460 }, { "epoch": 2.957741533383662, "grad_norm": 0.05111797299923122, "learning_rate": 7.541432112499487e-05, "loss": 0.839, "step": 168470 }, { "epoch": 2.957917098263663, "grad_norm": 0.08608268299739921, "learning_rate": 7.54035562372035e-05, "loss": 0.8512, "step": 168480 }, { "epoch": 2.9580926631436646, "grad_norm": 0.07918159997770169, "learning_rate": 7.539279167372392e-05, "loss": 0.8449, "step": 168490 }, { "epoch": 2.958268228023666, "grad_norm": 0.054624711597465715, "learning_rate": 7.538202743469264e-05, "loss": 0.849, "step": 168500 }, { "epoch": 2.9584437929036675, "grad_norm": 0.055918372567179214, "learning_rate": 7.537126352024597e-05, "loss": 0.8383, "step": 168510 }, { "epoch": 2.958619357783669, "grad_norm": 0.04658735199541122, "learning_rate": 7.536049993052026e-05, "loss": 0.8464, "step": 168520 }, { "epoch": 2.9587949226636705, "grad_norm": 0.05966164727810955, "learning_rate": 7.534973666565198e-05, "loss": 0.8423, "step": 168530 }, { "epoch": 2.958970487543672, "grad_norm": 0.06328942466932053, "learning_rate": 7.533897372577743e-05, "loss": 0.8487, "step": 168540 }, { "epoch": 2.959146052423673, "grad_norm": 0.05823969853584251, "learning_rate": 7.532821111103303e-05, "loss": 0.8453, "step": 168550 }, { "epoch": 2.9593216173036745, "grad_norm": 0.05292372641394586, "learning_rate": 7.531744882155506e-05, "loss": 0.8413, "step": 168560 }, { "epoch": 2.959497182183676, "grad_norm": 0.05135196983765343, "learning_rate": 7.530668685747998e-05, "loss": 0.8402, "step": 168570 }, { "epoch": 2.9596727470636774, "grad_norm": 0.042856978673269555, "learning_rate": 7.529592521894409e-05, "loss": 0.8503, "step": 168580 }, { "epoch": 2.959848311943679, "grad_norm": 0.04856271775855568, "learning_rate": 7.528516390608374e-05, "loss": 0.8293, "step": 168590 }, { "epoch": 2.96002387682368, "grad_norm": 0.06266236638722485, "learning_rate": 7.527440291903532e-05, "loss": 0.8509, "step": 168600 }, { "epoch": 2.9601994417036814, "grad_norm": 0.05991195757900381, "learning_rate": 7.526364225793515e-05, "loss": 0.8375, "step": 168610 }, { "epoch": 2.960375006583683, "grad_norm": 0.04634599055432888, "learning_rate": 7.525288192291953e-05, "loss": 0.8382, "step": 168620 }, { "epoch": 2.9605505714636844, "grad_norm": 0.04851637815715352, "learning_rate": 7.524212191412485e-05, "loss": 0.8424, "step": 168630 }, { "epoch": 2.960726136343686, "grad_norm": 0.047219451053629964, "learning_rate": 7.523136223168742e-05, "loss": 0.849, "step": 168640 }, { "epoch": 2.9609017012236873, "grad_norm": 0.050356839475001136, "learning_rate": 7.522060287574358e-05, "loss": 0.8437, "step": 168650 }, { "epoch": 2.961077266103689, "grad_norm": 0.07565658247875105, "learning_rate": 7.520984384642959e-05, "loss": 0.8442, "step": 168660 }, { "epoch": 2.96125283098369, "grad_norm": 0.04756622138999191, "learning_rate": 7.519908514388186e-05, "loss": 0.837, "step": 168670 }, { "epoch": 2.9614283958636913, "grad_norm": 0.05217294627547827, "learning_rate": 7.518832676823668e-05, "loss": 0.842, "step": 168680 }, { "epoch": 2.961603960743693, "grad_norm": 0.045228606942661724, "learning_rate": 7.517756871963032e-05, "loss": 0.8459, "step": 168690 }, { "epoch": 2.9617795256236943, "grad_norm": 0.05103652630472418, "learning_rate": 7.516681099819913e-05, "loss": 0.8465, "step": 168700 }, { "epoch": 2.9619550905036958, "grad_norm": 0.07996916682746567, "learning_rate": 7.515605360407941e-05, "loss": 0.8456, "step": 168710 }, { "epoch": 2.962130655383697, "grad_norm": 0.05569438579159169, "learning_rate": 7.514529653740742e-05, "loss": 0.8418, "step": 168720 }, { "epoch": 2.9623062202636987, "grad_norm": 0.05478965968494344, "learning_rate": 7.513453979831946e-05, "loss": 0.8372, "step": 168730 }, { "epoch": 2.9624817851436998, "grad_norm": 0.060611035871923256, "learning_rate": 7.512378338695186e-05, "loss": 0.837, "step": 168740 }, { "epoch": 2.9626573500237012, "grad_norm": 0.04846726381402895, "learning_rate": 7.511302730344089e-05, "loss": 0.8389, "step": 168750 }, { "epoch": 2.9628329149037027, "grad_norm": 0.05717131412173837, "learning_rate": 7.51022715479228e-05, "loss": 0.8447, "step": 168760 }, { "epoch": 2.963008479783704, "grad_norm": 0.04935139912725604, "learning_rate": 7.509151612053394e-05, "loss": 0.8356, "step": 168770 }, { "epoch": 2.9631840446637057, "grad_norm": 0.05130019650026619, "learning_rate": 7.508076102141051e-05, "loss": 0.8406, "step": 168780 }, { "epoch": 2.9633596095437067, "grad_norm": 0.05950380622806272, "learning_rate": 7.50700062506888e-05, "loss": 0.8357, "step": 168790 }, { "epoch": 2.963535174423708, "grad_norm": 0.06489190557976145, "learning_rate": 7.505925180850508e-05, "loss": 0.8422, "step": 168800 }, { "epoch": 2.9637107393037097, "grad_norm": 0.04530482153467937, "learning_rate": 7.504849769499563e-05, "loss": 0.8442, "step": 168810 }, { "epoch": 2.963886304183711, "grad_norm": 0.06311054040310143, "learning_rate": 7.503774391029668e-05, "loss": 0.8523, "step": 168820 }, { "epoch": 2.9640618690637126, "grad_norm": 0.05262967847609385, "learning_rate": 7.502699045454445e-05, "loss": 0.8428, "step": 168830 }, { "epoch": 2.9642374339437136, "grad_norm": 0.06038320431844586, "learning_rate": 7.501623732787526e-05, "loss": 0.8476, "step": 168840 }, { "epoch": 2.9644129988237156, "grad_norm": 0.05214931612395326, "learning_rate": 7.500548453042534e-05, "loss": 0.8502, "step": 168850 }, { "epoch": 2.9645885637037166, "grad_norm": 0.054596038313717364, "learning_rate": 7.499473206233087e-05, "loss": 0.8412, "step": 168860 }, { "epoch": 2.964764128583718, "grad_norm": 0.06420486436050703, "learning_rate": 7.498397992372818e-05, "loss": 0.8473, "step": 168870 }, { "epoch": 2.9649396934637195, "grad_norm": 0.05773873653324499, "learning_rate": 7.497322811475342e-05, "loss": 0.8466, "step": 168880 }, { "epoch": 2.965115258343721, "grad_norm": 0.05452381864483177, "learning_rate": 7.496247663554287e-05, "loss": 0.8399, "step": 168890 }, { "epoch": 2.9652908232237225, "grad_norm": 0.04992187153787078, "learning_rate": 7.495172548623269e-05, "loss": 0.8481, "step": 168900 }, { "epoch": 2.9654663881037235, "grad_norm": 0.05621121064332261, "learning_rate": 7.494097466695917e-05, "loss": 0.8398, "step": 168910 }, { "epoch": 2.965641952983725, "grad_norm": 0.05870437997399555, "learning_rate": 7.493022417785852e-05, "loss": 0.8419, "step": 168920 }, { "epoch": 2.9658175178637265, "grad_norm": 0.06904724764877426, "learning_rate": 7.491947401906687e-05, "loss": 0.8434, "step": 168930 }, { "epoch": 2.965993082743728, "grad_norm": 0.05497003361286376, "learning_rate": 7.49087241907205e-05, "loss": 0.8437, "step": 168940 }, { "epoch": 2.9661686476237294, "grad_norm": 0.06275294156145579, "learning_rate": 7.489797469295562e-05, "loss": 0.85, "step": 168950 }, { "epoch": 2.9663442125037305, "grad_norm": 0.04512596590802216, "learning_rate": 7.488722552590838e-05, "loss": 0.8341, "step": 168960 }, { "epoch": 2.9665197773837324, "grad_norm": 0.050568142447064014, "learning_rate": 7.487647668971501e-05, "loss": 0.8384, "step": 168970 }, { "epoch": 2.9666953422637334, "grad_norm": 0.05109962284296371, "learning_rate": 7.48657281845117e-05, "loss": 0.8411, "step": 168980 }, { "epoch": 2.966870907143735, "grad_norm": 0.055220741780403565, "learning_rate": 7.48549800104346e-05, "loss": 0.8529, "step": 168990 }, { "epoch": 2.9670464720237364, "grad_norm": 0.04985091822027374, "learning_rate": 7.484423216761989e-05, "loss": 0.8475, "step": 169000 }, { "epoch": 2.967222036903738, "grad_norm": 0.05272193794025351, "learning_rate": 7.48334846562038e-05, "loss": 0.8401, "step": 169010 }, { "epoch": 2.9673976017837393, "grad_norm": 0.043682503153636976, "learning_rate": 7.482273747632249e-05, "loss": 0.8405, "step": 169020 }, { "epoch": 2.9675731666637404, "grad_norm": 0.05523527768549374, "learning_rate": 7.481199062811208e-05, "loss": 0.8434, "step": 169030 }, { "epoch": 2.967748731543742, "grad_norm": 0.07783328990742736, "learning_rate": 7.480124411170878e-05, "loss": 0.8445, "step": 169040 }, { "epoch": 2.9679242964237433, "grad_norm": 0.059717492526369105, "learning_rate": 7.479049792724877e-05, "loss": 0.8485, "step": 169050 }, { "epoch": 2.968099861303745, "grad_norm": 0.05392892633772013, "learning_rate": 7.477975207486815e-05, "loss": 0.8387, "step": 169060 }, { "epoch": 2.9682754261837463, "grad_norm": 0.05391961839958929, "learning_rate": 7.476900655470304e-05, "loss": 0.8421, "step": 169070 }, { "epoch": 2.9684509910637478, "grad_norm": 0.05324704260745852, "learning_rate": 7.475826136688971e-05, "loss": 0.8455, "step": 169080 }, { "epoch": 2.9686265559437492, "grad_norm": 0.05609446963325138, "learning_rate": 7.474751651156424e-05, "loss": 0.8452, "step": 169090 }, { "epoch": 2.9688021208237503, "grad_norm": 0.05486450178186058, "learning_rate": 7.473677198886273e-05, "loss": 0.8409, "step": 169100 }, { "epoch": 2.9689776857037518, "grad_norm": 0.04831977893805946, "learning_rate": 7.472602779892138e-05, "loss": 0.8425, "step": 169110 }, { "epoch": 2.9691532505837532, "grad_norm": 0.047889772859908225, "learning_rate": 7.47152839418763e-05, "loss": 0.8401, "step": 169120 }, { "epoch": 2.9693288154637547, "grad_norm": 0.04965646926620969, "learning_rate": 7.47045404178636e-05, "loss": 0.8436, "step": 169130 }, { "epoch": 2.969504380343756, "grad_norm": 0.089480715414352, "learning_rate": 7.46937972270194e-05, "loss": 0.8427, "step": 169140 }, { "epoch": 2.9696799452237572, "grad_norm": 0.04326977789929106, "learning_rate": 7.468305436947986e-05, "loss": 0.8429, "step": 169150 }, { "epoch": 2.9698555101037587, "grad_norm": 0.05907912177242429, "learning_rate": 7.467231184538104e-05, "loss": 0.8478, "step": 169160 }, { "epoch": 2.97003107498376, "grad_norm": 0.06365626603564883, "learning_rate": 7.466156965485906e-05, "loss": 0.8503, "step": 169170 }, { "epoch": 2.9702066398637617, "grad_norm": 0.052724842427474876, "learning_rate": 7.465082779805005e-05, "loss": 0.8393, "step": 169180 }, { "epoch": 2.970382204743763, "grad_norm": 0.05808607598216973, "learning_rate": 7.464008627509012e-05, "loss": 0.8431, "step": 169190 }, { "epoch": 2.9705577696237646, "grad_norm": 0.05295360506180756, "learning_rate": 7.462934508611533e-05, "loss": 0.8516, "step": 169200 }, { "epoch": 2.970733334503766, "grad_norm": 0.055603412511300083, "learning_rate": 7.46186042312618e-05, "loss": 0.8418, "step": 169210 }, { "epoch": 2.970908899383767, "grad_norm": 0.049772046053090185, "learning_rate": 7.460786371066562e-05, "loss": 0.8443, "step": 169220 }, { "epoch": 2.9710844642637686, "grad_norm": 0.05818245298839031, "learning_rate": 7.459712352446288e-05, "loss": 0.8369, "step": 169230 }, { "epoch": 2.97126002914377, "grad_norm": 0.07157322371041915, "learning_rate": 7.458638367278958e-05, "loss": 0.8455, "step": 169240 }, { "epoch": 2.9714355940237716, "grad_norm": 0.05330694296559712, "learning_rate": 7.457564415578191e-05, "loss": 0.8448, "step": 169250 }, { "epoch": 2.971611158903773, "grad_norm": 0.05881120848448991, "learning_rate": 7.45649049735759e-05, "loss": 0.8459, "step": 169260 }, { "epoch": 2.971786723783774, "grad_norm": 0.05096062241057413, "learning_rate": 7.455416612630758e-05, "loss": 0.8429, "step": 169270 }, { "epoch": 2.9719622886637755, "grad_norm": 0.04278002872093576, "learning_rate": 7.454342761411307e-05, "loss": 0.8432, "step": 169280 }, { "epoch": 2.972137853543777, "grad_norm": 0.05835168944900167, "learning_rate": 7.45326894371284e-05, "loss": 0.8395, "step": 169290 }, { "epoch": 2.9723134184237785, "grad_norm": 0.056750955453901505, "learning_rate": 7.452195159548961e-05, "loss": 0.8345, "step": 169300 }, { "epoch": 2.97248898330378, "grad_norm": 0.050582731865255665, "learning_rate": 7.451121408933276e-05, "loss": 0.8463, "step": 169310 }, { "epoch": 2.9726645481837815, "grad_norm": 0.04832329732001909, "learning_rate": 7.450047691879395e-05, "loss": 0.8476, "step": 169320 }, { "epoch": 2.972840113063783, "grad_norm": 0.055811087676733415, "learning_rate": 7.448974008400914e-05, "loss": 0.842, "step": 169330 }, { "epoch": 2.973015677943784, "grad_norm": 0.05164443900316336, "learning_rate": 7.447900358511439e-05, "loss": 0.8453, "step": 169340 }, { "epoch": 2.9731912428237854, "grad_norm": 0.04102340773307804, "learning_rate": 7.446826742224576e-05, "loss": 0.845, "step": 169350 }, { "epoch": 2.973366807703787, "grad_norm": 0.05117725897231588, "learning_rate": 7.445753159553928e-05, "loss": 0.8488, "step": 169360 }, { "epoch": 2.9735423725837884, "grad_norm": 0.06097823058859093, "learning_rate": 7.444679610513095e-05, "loss": 0.8576, "step": 169370 }, { "epoch": 2.97371793746379, "grad_norm": 0.05113923283994533, "learning_rate": 7.443606095115682e-05, "loss": 0.8395, "step": 169380 }, { "epoch": 2.973893502343791, "grad_norm": 0.06161564574439606, "learning_rate": 7.442532613375288e-05, "loss": 0.8429, "step": 169390 }, { "epoch": 2.974069067223793, "grad_norm": 0.0614683602751196, "learning_rate": 7.441459165305517e-05, "loss": 0.8387, "step": 169400 }, { "epoch": 2.974244632103794, "grad_norm": 0.052447958837884986, "learning_rate": 7.440385750919962e-05, "loss": 0.8452, "step": 169410 }, { "epoch": 2.9744201969837953, "grad_norm": 0.05780621098119954, "learning_rate": 7.439312370232233e-05, "loss": 0.8399, "step": 169420 }, { "epoch": 2.974595761863797, "grad_norm": 0.046879501814102, "learning_rate": 7.438239023255926e-05, "loss": 0.8479, "step": 169430 }, { "epoch": 2.9747713267437983, "grad_norm": 0.055364523210159895, "learning_rate": 7.43716571000464e-05, "loss": 0.8395, "step": 169440 }, { "epoch": 2.9749468916237998, "grad_norm": 0.09025764903432552, "learning_rate": 7.436092430491977e-05, "loss": 0.8448, "step": 169450 }, { "epoch": 2.975122456503801, "grad_norm": 0.056067628483124, "learning_rate": 7.435019184731531e-05, "loss": 0.8365, "step": 169460 }, { "epoch": 2.9752980213838023, "grad_norm": 0.06836727256194225, "learning_rate": 7.433945972736906e-05, "loss": 0.8484, "step": 169470 }, { "epoch": 2.9754735862638038, "grad_norm": 0.06681496704714468, "learning_rate": 7.432872794521692e-05, "loss": 0.8484, "step": 169480 }, { "epoch": 2.9756491511438052, "grad_norm": 0.058990406033963945, "learning_rate": 7.431799650099495e-05, "loss": 0.8358, "step": 169490 }, { "epoch": 2.9758247160238067, "grad_norm": 0.06840373248176704, "learning_rate": 7.430726539483906e-05, "loss": 0.8418, "step": 169500 }, { "epoch": 2.9760002809038077, "grad_norm": 0.05814389156153244, "learning_rate": 7.42965346268852e-05, "loss": 0.8365, "step": 169510 }, { "epoch": 2.9761758457838097, "grad_norm": 0.057937669123300735, "learning_rate": 7.42858041972694e-05, "loss": 0.8315, "step": 169520 }, { "epoch": 2.9763514106638107, "grad_norm": 0.04813126130427398, "learning_rate": 7.427507410612758e-05, "loss": 0.8473, "step": 169530 }, { "epoch": 2.976526975543812, "grad_norm": 0.09863817040373764, "learning_rate": 7.426434435359568e-05, "loss": 0.8372, "step": 169540 }, { "epoch": 2.9767025404238137, "grad_norm": 0.048802869850876646, "learning_rate": 7.425361493980968e-05, "loss": 0.8396, "step": 169550 }, { "epoch": 2.976878105303815, "grad_norm": 0.045639126445160376, "learning_rate": 7.42428858649055e-05, "loss": 0.8431, "step": 169560 }, { "epoch": 2.9770536701838166, "grad_norm": 0.06370854262053151, "learning_rate": 7.423215712901908e-05, "loss": 0.8387, "step": 169570 }, { "epoch": 2.9772292350638176, "grad_norm": 0.07563635939820122, "learning_rate": 7.422142873228632e-05, "loss": 0.8356, "step": 169580 }, { "epoch": 2.977404799943819, "grad_norm": 0.05401844260063269, "learning_rate": 7.421070067484324e-05, "loss": 0.8482, "step": 169590 }, { "epoch": 2.9775803648238206, "grad_norm": 0.06383308410711917, "learning_rate": 7.41999729568257e-05, "loss": 0.8452, "step": 169600 }, { "epoch": 2.977755929703822, "grad_norm": 0.05044443075931382, "learning_rate": 7.418924557836964e-05, "loss": 0.8486, "step": 169610 }, { "epoch": 2.9779314945838236, "grad_norm": 0.060889216345067484, "learning_rate": 7.417851853961098e-05, "loss": 0.8381, "step": 169620 }, { "epoch": 2.9781070594638246, "grad_norm": 0.053852193135811924, "learning_rate": 7.416779184068564e-05, "loss": 0.8405, "step": 169630 }, { "epoch": 2.9782826243438265, "grad_norm": 0.04659075819997242, "learning_rate": 7.415706548172952e-05, "loss": 0.8377, "step": 169640 }, { "epoch": 2.9784581892238275, "grad_norm": 0.06248010595068482, "learning_rate": 7.41463394628785e-05, "loss": 0.8468, "step": 169650 }, { "epoch": 2.978633754103829, "grad_norm": 0.08084670600664634, "learning_rate": 7.413561378426854e-05, "loss": 0.8377, "step": 169660 }, { "epoch": 2.9788093189838305, "grad_norm": 0.06581696598200197, "learning_rate": 7.412488844603549e-05, "loss": 0.8436, "step": 169670 }, { "epoch": 2.978984883863832, "grad_norm": 0.061200699610134926, "learning_rate": 7.41141634483152e-05, "loss": 0.8436, "step": 169680 }, { "epoch": 2.9791604487438335, "grad_norm": 0.052678852624720905, "learning_rate": 7.410343879124366e-05, "loss": 0.8465, "step": 169690 }, { "epoch": 2.9793360136238345, "grad_norm": 0.0613733434999498, "learning_rate": 7.409271447495672e-05, "loss": 0.8432, "step": 169700 }, { "epoch": 2.979511578503836, "grad_norm": 0.05587562946276938, "learning_rate": 7.408199049959021e-05, "loss": 0.8396, "step": 169710 }, { "epoch": 2.9796871433838374, "grad_norm": 0.042005635558149956, "learning_rate": 7.407126686528007e-05, "loss": 0.8443, "step": 169720 }, { "epoch": 2.979862708263839, "grad_norm": 0.05966108414395797, "learning_rate": 7.406054357216213e-05, "loss": 0.8493, "step": 169730 }, { "epoch": 2.9800382731438404, "grad_norm": 0.0484983172245412, "learning_rate": 7.404982062037228e-05, "loss": 0.8436, "step": 169740 }, { "epoch": 2.980213838023842, "grad_norm": 0.04108956855799947, "learning_rate": 7.40390980100463e-05, "loss": 0.8441, "step": 169750 }, { "epoch": 2.9803894029038434, "grad_norm": 0.052817894521178195, "learning_rate": 7.402837574132018e-05, "loss": 0.8406, "step": 169760 }, { "epoch": 2.9805649677838444, "grad_norm": 0.060582582450352813, "learning_rate": 7.40176538143297e-05, "loss": 0.8474, "step": 169770 }, { "epoch": 2.980740532663846, "grad_norm": 0.07098972014511638, "learning_rate": 7.40069322292107e-05, "loss": 0.8381, "step": 169780 }, { "epoch": 2.9809160975438473, "grad_norm": 0.0685583609482427, "learning_rate": 7.399621098609906e-05, "loss": 0.8463, "step": 169790 }, { "epoch": 2.981091662423849, "grad_norm": 0.053552629116923496, "learning_rate": 7.39854900851306e-05, "loss": 0.8407, "step": 169800 }, { "epoch": 2.9812672273038503, "grad_norm": 0.052610871033402944, "learning_rate": 7.397476952644117e-05, "loss": 0.8433, "step": 169810 }, { "epoch": 2.9814427921838513, "grad_norm": 0.04756424901291243, "learning_rate": 7.396404931016654e-05, "loss": 0.8418, "step": 169820 }, { "epoch": 2.981618357063853, "grad_norm": 0.0664439025924535, "learning_rate": 7.395332943644263e-05, "loss": 0.8461, "step": 169830 }, { "epoch": 2.9817939219438543, "grad_norm": 0.07239490765627432, "learning_rate": 7.394260990540522e-05, "loss": 0.8496, "step": 169840 }, { "epoch": 2.9819694868238558, "grad_norm": 0.04631434563909119, "learning_rate": 7.393189071719009e-05, "loss": 0.8429, "step": 169850 }, { "epoch": 2.9821450517038572, "grad_norm": 0.05759506034646767, "learning_rate": 7.392117187193312e-05, "loss": 0.842, "step": 169860 }, { "epoch": 2.9823206165838587, "grad_norm": 0.05859004674687411, "learning_rate": 7.39104533697701e-05, "loss": 0.8482, "step": 169870 }, { "epoch": 2.98249618146386, "grad_norm": 0.049015526539613725, "learning_rate": 7.389973521083681e-05, "loss": 0.8431, "step": 169880 }, { "epoch": 2.9826717463438612, "grad_norm": 0.04372805760047643, "learning_rate": 7.388901739526907e-05, "loss": 0.8449, "step": 169890 }, { "epoch": 2.9828473112238627, "grad_norm": 0.04904656002226597, "learning_rate": 7.38782999232027e-05, "loss": 0.8469, "step": 169900 }, { "epoch": 2.983022876103864, "grad_norm": 0.05335345411860456, "learning_rate": 7.386758279477345e-05, "loss": 0.843, "step": 169910 }, { "epoch": 2.9831984409838657, "grad_norm": 0.05368055592607822, "learning_rate": 7.385686601011708e-05, "loss": 0.8332, "step": 169920 }, { "epoch": 2.983374005863867, "grad_norm": 0.05503623204134334, "learning_rate": 7.384614956936949e-05, "loss": 0.8384, "step": 169930 }, { "epoch": 2.983549570743868, "grad_norm": 0.04250525112415654, "learning_rate": 7.383543347266637e-05, "loss": 0.8413, "step": 169940 }, { "epoch": 2.9837251356238697, "grad_norm": 0.05163159906736531, "learning_rate": 7.382471772014349e-05, "loss": 0.85, "step": 169950 }, { "epoch": 2.983900700503871, "grad_norm": 0.056972638197625124, "learning_rate": 7.381400231193667e-05, "loss": 0.8341, "step": 169960 }, { "epoch": 2.9840762653838726, "grad_norm": 0.058968572616418406, "learning_rate": 7.380328724818166e-05, "loss": 0.8344, "step": 169970 }, { "epoch": 2.984251830263874, "grad_norm": 0.05367295658355187, "learning_rate": 7.37925725290142e-05, "loss": 0.8447, "step": 169980 }, { "epoch": 2.9844273951438756, "grad_norm": 0.04575284195613215, "learning_rate": 7.378185815457003e-05, "loss": 0.8376, "step": 169990 }, { "epoch": 2.984602960023877, "grad_norm": 0.0461842669296962, "learning_rate": 7.377114412498499e-05, "loss": 0.8412, "step": 170000 }, { "epoch": 2.984778524903878, "grad_norm": 0.08426101401013081, "learning_rate": 7.376043044039475e-05, "loss": 0.8396, "step": 170010 }, { "epoch": 2.9849540897838795, "grad_norm": 0.05268664330701726, "learning_rate": 7.374971710093502e-05, "loss": 0.84, "step": 170020 }, { "epoch": 2.985129654663881, "grad_norm": 0.04377089839084537, "learning_rate": 7.373900410674166e-05, "loss": 0.8445, "step": 170030 }, { "epoch": 2.9853052195438825, "grad_norm": 0.049274770251911425, "learning_rate": 7.372829145795034e-05, "loss": 0.8381, "step": 170040 }, { "epoch": 2.985480784423884, "grad_norm": 0.05208854096900774, "learning_rate": 7.371757915469676e-05, "loss": 0.8396, "step": 170050 }, { "epoch": 2.985656349303885, "grad_norm": 0.06806988922789081, "learning_rate": 7.370686719711673e-05, "loss": 0.8385, "step": 170060 }, { "epoch": 2.9858319141838865, "grad_norm": 0.05169816717309718, "learning_rate": 7.369615558534591e-05, "loss": 0.8412, "step": 170070 }, { "epoch": 2.986007479063888, "grad_norm": 0.056122059173729255, "learning_rate": 7.368544431952002e-05, "loss": 0.8387, "step": 170080 }, { "epoch": 2.9861830439438894, "grad_norm": 0.05446188474568226, "learning_rate": 7.367473339977475e-05, "loss": 0.8313, "step": 170090 }, { "epoch": 2.986358608823891, "grad_norm": 0.057327158293982966, "learning_rate": 7.366402282624588e-05, "loss": 0.8387, "step": 170100 }, { "epoch": 2.9865341737038924, "grad_norm": 0.06713353679049303, "learning_rate": 7.365331259906907e-05, "loss": 0.8362, "step": 170110 }, { "epoch": 2.986709738583894, "grad_norm": 0.06257696161893787, "learning_rate": 7.364260271838004e-05, "loss": 0.8467, "step": 170120 }, { "epoch": 2.986885303463895, "grad_norm": 0.05975831895768334, "learning_rate": 7.363189318431449e-05, "loss": 0.8455, "step": 170130 }, { "epoch": 2.9870608683438964, "grad_norm": 0.052479366756728554, "learning_rate": 7.362118399700806e-05, "loss": 0.8341, "step": 170140 }, { "epoch": 2.987236433223898, "grad_norm": 0.05184763427723244, "learning_rate": 7.361047515659653e-05, "loss": 0.8403, "step": 170150 }, { "epoch": 2.9874119981038993, "grad_norm": 0.05507909512169412, "learning_rate": 7.359976666321548e-05, "loss": 0.8408, "step": 170160 }, { "epoch": 2.987587562983901, "grad_norm": 0.05227901719345902, "learning_rate": 7.358905851700066e-05, "loss": 0.8438, "step": 170170 }, { "epoch": 2.987763127863902, "grad_norm": 0.04719913347909704, "learning_rate": 7.357835071808774e-05, "loss": 0.8345, "step": 170180 }, { "epoch": 2.987938692743904, "grad_norm": 0.06729112066219092, "learning_rate": 7.356764326661231e-05, "loss": 0.8419, "step": 170190 }, { "epoch": 2.988114257623905, "grad_norm": 0.061404153393933465, "learning_rate": 7.355693616271015e-05, "loss": 0.851, "step": 170200 }, { "epoch": 2.9882898225039063, "grad_norm": 0.07972491589198283, "learning_rate": 7.354622940651686e-05, "loss": 0.8375, "step": 170210 }, { "epoch": 2.9884653873839078, "grad_norm": 0.053420208257593374, "learning_rate": 7.353552299816808e-05, "loss": 0.8488, "step": 170220 }, { "epoch": 2.9886409522639092, "grad_norm": 0.06324240837295032, "learning_rate": 7.35248169377995e-05, "loss": 0.8416, "step": 170230 }, { "epoch": 2.9888165171439107, "grad_norm": 0.06530730467911545, "learning_rate": 7.351411122554677e-05, "loss": 0.8496, "step": 170240 }, { "epoch": 2.9889920820239118, "grad_norm": 0.06575735723547085, "learning_rate": 7.350340586154553e-05, "loss": 0.8371, "step": 170250 }, { "epoch": 2.9891676469039132, "grad_norm": 0.05289449273918068, "learning_rate": 7.349270084593135e-05, "loss": 0.8377, "step": 170260 }, { "epoch": 2.9893432117839147, "grad_norm": 0.05259459612735132, "learning_rate": 7.348199617883996e-05, "loss": 0.8434, "step": 170270 }, { "epoch": 2.989518776663916, "grad_norm": 0.057690048012012045, "learning_rate": 7.347129186040697e-05, "loss": 0.8408, "step": 170280 }, { "epoch": 2.9896943415439177, "grad_norm": 0.055747281892168384, "learning_rate": 7.346058789076795e-05, "loss": 0.8352, "step": 170290 }, { "epoch": 2.9898699064239187, "grad_norm": 0.058813061095760316, "learning_rate": 7.344988427005859e-05, "loss": 0.8456, "step": 170300 }, { "epoch": 2.9900454713039206, "grad_norm": 0.05755152956809683, "learning_rate": 7.343918099841447e-05, "loss": 0.8433, "step": 170310 }, { "epoch": 2.9902210361839217, "grad_norm": 0.06166154915908936, "learning_rate": 7.342847807597122e-05, "loss": 0.8473, "step": 170320 }, { "epoch": 2.990396601063923, "grad_norm": 0.05445228341230468, "learning_rate": 7.34177755028644e-05, "loss": 0.8515, "step": 170330 }, { "epoch": 2.9905721659439246, "grad_norm": 0.055271865997678665, "learning_rate": 7.340707327922969e-05, "loss": 0.8383, "step": 170340 }, { "epoch": 2.990747730823926, "grad_norm": 0.05864934852110265, "learning_rate": 7.339637140520264e-05, "loss": 0.8487, "step": 170350 }, { "epoch": 2.9909232957039276, "grad_norm": 0.04779014075877621, "learning_rate": 7.33856698809188e-05, "loss": 0.84, "step": 170360 }, { "epoch": 2.9910988605839286, "grad_norm": 0.05021014854916971, "learning_rate": 7.337496870651388e-05, "loss": 0.8407, "step": 170370 }, { "epoch": 2.99127442546393, "grad_norm": 0.048684150471849236, "learning_rate": 7.336426788212337e-05, "loss": 0.8389, "step": 170380 }, { "epoch": 2.9914499903439316, "grad_norm": 0.04710905721558106, "learning_rate": 7.33535674078829e-05, "loss": 0.8375, "step": 170390 }, { "epoch": 2.991625555223933, "grad_norm": 0.04790921675403385, "learning_rate": 7.334286728392803e-05, "loss": 0.8383, "step": 170400 }, { "epoch": 2.9918011201039345, "grad_norm": 0.04504366733460787, "learning_rate": 7.333216751039432e-05, "loss": 0.8303, "step": 170410 }, { "epoch": 2.9919766849839355, "grad_norm": 0.048711659597662514, "learning_rate": 7.332146808741736e-05, "loss": 0.8357, "step": 170420 }, { "epoch": 2.9921522498639375, "grad_norm": 0.057032803664389085, "learning_rate": 7.331076901513268e-05, "loss": 0.8474, "step": 170430 }, { "epoch": 2.9923278147439385, "grad_norm": 0.05441315911106953, "learning_rate": 7.330007029367589e-05, "loss": 0.8379, "step": 170440 }, { "epoch": 2.99250337962394, "grad_norm": 0.05442541798045794, "learning_rate": 7.32893719231825e-05, "loss": 0.831, "step": 170450 }, { "epoch": 2.9926789445039415, "grad_norm": 0.05598315271819905, "learning_rate": 7.327867390378808e-05, "loss": 0.8423, "step": 170460 }, { "epoch": 2.992854509383943, "grad_norm": 0.052033621090318054, "learning_rate": 7.32679762356282e-05, "loss": 0.842, "step": 170470 }, { "epoch": 2.9930300742639444, "grad_norm": 0.05314491709744866, "learning_rate": 7.325727891883836e-05, "loss": 0.8384, "step": 170480 }, { "epoch": 2.9932056391439454, "grad_norm": 0.04741689102548389, "learning_rate": 7.324658195355412e-05, "loss": 0.842, "step": 170490 }, { "epoch": 2.993381204023947, "grad_norm": 0.058568567405839704, "learning_rate": 7.323588533991099e-05, "loss": 0.8423, "step": 170500 }, { "epoch": 2.9935567689039484, "grad_norm": 0.07303103940950065, "learning_rate": 7.322518907804453e-05, "loss": 0.846, "step": 170510 }, { "epoch": 2.99373233378395, "grad_norm": 0.06710055779379079, "learning_rate": 7.321449316809024e-05, "loss": 0.8477, "step": 170520 }, { "epoch": 2.9939078986639514, "grad_norm": 0.05474660746947266, "learning_rate": 7.320379761018361e-05, "loss": 0.8434, "step": 170530 }, { "epoch": 2.994083463543953, "grad_norm": 0.04858331239753577, "learning_rate": 7.319310240446024e-05, "loss": 0.8415, "step": 170540 }, { "epoch": 2.9942590284239543, "grad_norm": 0.05246367654257503, "learning_rate": 7.318240755105558e-05, "loss": 0.8444, "step": 170550 }, { "epoch": 2.9944345933039553, "grad_norm": 0.05712871727182397, "learning_rate": 7.317171305010513e-05, "loss": 0.8508, "step": 170560 }, { "epoch": 2.994610158183957, "grad_norm": 0.05709899146791603, "learning_rate": 7.316101890174445e-05, "loss": 0.8441, "step": 170570 }, { "epoch": 2.9947857230639583, "grad_norm": 0.0458033758665017, "learning_rate": 7.315032510610896e-05, "loss": 0.8384, "step": 170580 }, { "epoch": 2.9949612879439598, "grad_norm": 0.047526396344823585, "learning_rate": 7.313963166333422e-05, "loss": 0.8356, "step": 170590 }, { "epoch": 2.9951368528239612, "grad_norm": 0.04565686111293034, "learning_rate": 7.312893857355562e-05, "loss": 0.8482, "step": 170600 }, { "epoch": 2.9953124177039623, "grad_norm": 0.07362268294179275, "learning_rate": 7.311824583690877e-05, "loss": 0.8365, "step": 170610 }, { "epoch": 2.9954879825839638, "grad_norm": 0.06156580335941738, "learning_rate": 7.310755345352909e-05, "loss": 0.8485, "step": 170620 }, { "epoch": 2.9956635474639652, "grad_norm": 0.06548819389914416, "learning_rate": 7.309686142355202e-05, "loss": 0.8369, "step": 170630 }, { "epoch": 2.9958391123439667, "grad_norm": 0.05998777108796838, "learning_rate": 7.308616974711309e-05, "loss": 0.8407, "step": 170640 }, { "epoch": 2.996014677223968, "grad_norm": 0.058372120653331916, "learning_rate": 7.307547842434773e-05, "loss": 0.8513, "step": 170650 }, { "epoch": 2.9961902421039697, "grad_norm": 0.0538071059471943, "learning_rate": 7.306478745539141e-05, "loss": 0.8503, "step": 170660 }, { "epoch": 2.996365806983971, "grad_norm": 0.04647545117155671, "learning_rate": 7.305409684037958e-05, "loss": 0.8415, "step": 170670 }, { "epoch": 2.996541371863972, "grad_norm": 0.04704973853240036, "learning_rate": 7.304340657944772e-05, "loss": 0.8409, "step": 170680 }, { "epoch": 2.9967169367439737, "grad_norm": 0.09295305566176376, "learning_rate": 7.303271667273124e-05, "loss": 0.8446, "step": 170690 }, { "epoch": 2.996892501623975, "grad_norm": 0.05095672552659034, "learning_rate": 7.302202712036557e-05, "loss": 0.841, "step": 170700 }, { "epoch": 2.9970680665039766, "grad_norm": 0.07143422218423198, "learning_rate": 7.301133792248621e-05, "loss": 0.8381, "step": 170710 }, { "epoch": 2.997243631383978, "grad_norm": 0.05408948701619399, "learning_rate": 7.300064907922858e-05, "loss": 0.8404, "step": 170720 }, { "epoch": 2.997419196263979, "grad_norm": 0.048974084355588296, "learning_rate": 7.298996059072804e-05, "loss": 0.8445, "step": 170730 }, { "epoch": 2.9975947611439806, "grad_norm": 0.05714008339213229, "learning_rate": 7.297927245712013e-05, "loss": 0.8388, "step": 170740 }, { "epoch": 2.997770326023982, "grad_norm": 0.062230533687453446, "learning_rate": 7.296858467854019e-05, "loss": 0.848, "step": 170750 }, { "epoch": 2.9979458909039836, "grad_norm": 0.06247602540043573, "learning_rate": 7.295789725512365e-05, "loss": 0.8423, "step": 170760 }, { "epoch": 2.998121455783985, "grad_norm": 0.045216712972055574, "learning_rate": 7.29472101870059e-05, "loss": 0.8361, "step": 170770 }, { "epoch": 2.9982970206639865, "grad_norm": 0.045834626304271445, "learning_rate": 7.293652347432238e-05, "loss": 0.8454, "step": 170780 }, { "epoch": 2.998472585543988, "grad_norm": 0.05962768305935381, "learning_rate": 7.292583711720852e-05, "loss": 0.8396, "step": 170790 }, { "epoch": 2.998648150423989, "grad_norm": 0.04478891749787044, "learning_rate": 7.291515111579964e-05, "loss": 0.8469, "step": 170800 }, { "epoch": 2.9988237153039905, "grad_norm": 0.06671405432050598, "learning_rate": 7.290446547023121e-05, "loss": 0.8368, "step": 170810 }, { "epoch": 2.998999280183992, "grad_norm": 0.041001479979801886, "learning_rate": 7.289378018063858e-05, "loss": 0.8348, "step": 170820 }, { "epoch": 2.9991748450639935, "grad_norm": 0.05045777104600123, "learning_rate": 7.288309524715717e-05, "loss": 0.8408, "step": 170830 }, { "epoch": 2.999350409943995, "grad_norm": 0.04890461973751653, "learning_rate": 7.287241066992228e-05, "loss": 0.8396, "step": 170840 }, { "epoch": 2.999525974823996, "grad_norm": 0.06330839726894504, "learning_rate": 7.286172644906936e-05, "loss": 0.8397, "step": 170850 }, { "epoch": 2.999701539703998, "grad_norm": 0.051508673730117326, "learning_rate": 7.285104258473376e-05, "loss": 0.8364, "step": 170860 }, { "epoch": 2.999877104583999, "grad_norm": 0.05417489216685118, "learning_rate": 7.284035907705081e-05, "loss": 0.846, "step": 170870 }, { "epoch": 3.0000526694640004, "grad_norm": 0.05742736119916887, "learning_rate": 7.282967592615597e-05, "loss": 0.8438, "step": 170880 }, { "epoch": 3.000228234344002, "grad_norm": 0.054440744752247555, "learning_rate": 7.281899313218449e-05, "loss": 0.834, "step": 170890 }, { "epoch": 3.0004037992240034, "grad_norm": 0.07013086190726067, "learning_rate": 7.280831069527178e-05, "loss": 0.8455, "step": 170900 }, { "epoch": 3.000579364104005, "grad_norm": 0.05690449144935582, "learning_rate": 7.279762861555318e-05, "loss": 0.8427, "step": 170910 }, { "epoch": 3.000754928984006, "grad_norm": 0.046283970535679356, "learning_rate": 7.278694689316403e-05, "loss": 0.8493, "step": 170920 }, { "epoch": 3.0009304938640073, "grad_norm": 0.049712548707396054, "learning_rate": 7.277626552823969e-05, "loss": 0.8398, "step": 170930 }, { "epoch": 3.001106058744009, "grad_norm": 0.04937766840043774, "learning_rate": 7.276558452091542e-05, "loss": 0.8404, "step": 170940 }, { "epoch": 3.0012816236240103, "grad_norm": 0.06380492785993028, "learning_rate": 7.275490387132664e-05, "loss": 0.8391, "step": 170950 }, { "epoch": 3.0014571885040118, "grad_norm": 0.0545731133318981, "learning_rate": 7.274422357960865e-05, "loss": 0.8388, "step": 170960 }, { "epoch": 3.0016327533840133, "grad_norm": 0.04658169765322587, "learning_rate": 7.273354364589676e-05, "loss": 0.8385, "step": 170970 }, { "epoch": 3.0018083182640143, "grad_norm": 0.06300817729775568, "learning_rate": 7.272286407032628e-05, "loss": 0.8403, "step": 170980 }, { "epoch": 3.0019838831440158, "grad_norm": 0.04840269482310942, "learning_rate": 7.271218485303253e-05, "loss": 0.8454, "step": 170990 }, { "epoch": 3.0021594480240172, "grad_norm": 0.07420735955546094, "learning_rate": 7.270150599415083e-05, "loss": 0.8446, "step": 171000 }, { "epoch": 3.0023350129040187, "grad_norm": 0.05735373342971433, "learning_rate": 7.269082749381647e-05, "loss": 0.8421, "step": 171010 }, { "epoch": 3.00251057778402, "grad_norm": 0.05452477975203114, "learning_rate": 7.268014935216474e-05, "loss": 0.8415, "step": 171020 }, { "epoch": 3.0026861426640217, "grad_norm": 0.07701152673555976, "learning_rate": 7.266947156933097e-05, "loss": 0.8381, "step": 171030 }, { "epoch": 3.0028617075440227, "grad_norm": 0.06694678444100004, "learning_rate": 7.265879414545036e-05, "loss": 0.839, "step": 171040 }, { "epoch": 3.003037272424024, "grad_norm": 0.05722504674095029, "learning_rate": 7.264811708065832e-05, "loss": 0.8459, "step": 171050 }, { "epoch": 3.0032128373040257, "grad_norm": 0.04723396941863149, "learning_rate": 7.263744037509005e-05, "loss": 0.8409, "step": 171060 }, { "epoch": 3.003388402184027, "grad_norm": 0.053006148709238726, "learning_rate": 7.262676402888084e-05, "loss": 0.8397, "step": 171070 }, { "epoch": 3.0035639670640286, "grad_norm": 0.06602440888939964, "learning_rate": 7.2616088042166e-05, "loss": 0.8419, "step": 171080 }, { "epoch": 3.00373953194403, "grad_norm": 0.05210727737864016, "learning_rate": 7.260541241508074e-05, "loss": 0.8373, "step": 171090 }, { "epoch": 3.003915096824031, "grad_norm": 0.04787416627643182, "learning_rate": 7.259473714776036e-05, "loss": 0.8498, "step": 171100 }, { "epoch": 3.0040906617040326, "grad_norm": 0.04359113376153456, "learning_rate": 7.258406224034006e-05, "loss": 0.8506, "step": 171110 }, { "epoch": 3.004266226584034, "grad_norm": 0.07167907862825396, "learning_rate": 7.257338769295518e-05, "loss": 0.8469, "step": 171120 }, { "epoch": 3.0044417914640356, "grad_norm": 0.06038987906429896, "learning_rate": 7.256271350574093e-05, "loss": 0.8379, "step": 171130 }, { "epoch": 3.004617356344037, "grad_norm": 0.05548249564124799, "learning_rate": 7.255203967883252e-05, "loss": 0.8484, "step": 171140 }, { "epoch": 3.0047929212240385, "grad_norm": 0.05515496578860297, "learning_rate": 7.254136621236526e-05, "loss": 0.845, "step": 171150 }, { "epoch": 3.0049684861040395, "grad_norm": 0.0692798052040501, "learning_rate": 7.253069310647433e-05, "loss": 0.8434, "step": 171160 }, { "epoch": 3.005144050984041, "grad_norm": 0.05781575085244356, "learning_rate": 7.252002036129496e-05, "loss": 0.8374, "step": 171170 }, { "epoch": 3.0053196158640425, "grad_norm": 0.05616345538192738, "learning_rate": 7.250934797696238e-05, "loss": 0.849, "step": 171180 }, { "epoch": 3.005495180744044, "grad_norm": 0.05742974712783364, "learning_rate": 7.249867595361185e-05, "loss": 0.8356, "step": 171190 }, { "epoch": 3.0056707456240455, "grad_norm": 0.05375911241593356, "learning_rate": 7.248800429137855e-05, "loss": 0.8365, "step": 171200 }, { "epoch": 3.005846310504047, "grad_norm": 0.06169345618960183, "learning_rate": 7.247733299039766e-05, "loss": 0.8385, "step": 171210 }, { "epoch": 3.006021875384048, "grad_norm": 0.04763401624830053, "learning_rate": 7.246666205080449e-05, "loss": 0.8436, "step": 171220 }, { "epoch": 3.0061974402640494, "grad_norm": 0.06761917752551917, "learning_rate": 7.245599147273414e-05, "loss": 0.8456, "step": 171230 }, { "epoch": 3.006373005144051, "grad_norm": 0.058375944520206044, "learning_rate": 7.244532125632185e-05, "loss": 0.842, "step": 171240 }, { "epoch": 3.0065485700240524, "grad_norm": 0.05909187457216946, "learning_rate": 7.243465140170283e-05, "loss": 0.8401, "step": 171250 }, { "epoch": 3.006724134904054, "grad_norm": 0.06321761087808035, "learning_rate": 7.242398190901226e-05, "loss": 0.8436, "step": 171260 }, { "epoch": 3.0068996997840554, "grad_norm": 0.048867133083927344, "learning_rate": 7.24133127783853e-05, "loss": 0.8441, "step": 171270 }, { "epoch": 3.0070752646640564, "grad_norm": 0.05307242951729981, "learning_rate": 7.240264400995713e-05, "loss": 0.8442, "step": 171280 }, { "epoch": 3.007250829544058, "grad_norm": 0.053621635056094005, "learning_rate": 7.239197560386295e-05, "loss": 0.8403, "step": 171290 }, { "epoch": 3.0074263944240593, "grad_norm": 0.06937257148081516, "learning_rate": 7.238130756023794e-05, "loss": 0.8442, "step": 171300 }, { "epoch": 3.007601959304061, "grad_norm": 0.048170336595598925, "learning_rate": 7.237063987921721e-05, "loss": 0.845, "step": 171310 }, { "epoch": 3.0077775241840623, "grad_norm": 0.053032698649244474, "learning_rate": 7.2359972560936e-05, "loss": 0.8475, "step": 171320 }, { "epoch": 3.007953089064064, "grad_norm": 0.04762066563536456, "learning_rate": 7.234930560552943e-05, "loss": 0.8327, "step": 171330 }, { "epoch": 3.008128653944065, "grad_norm": 0.05695093665871964, "learning_rate": 7.233863901313263e-05, "loss": 0.8438, "step": 171340 }, { "epoch": 3.0083042188240663, "grad_norm": 0.048249550987154825, "learning_rate": 7.232797278388075e-05, "loss": 0.8415, "step": 171350 }, { "epoch": 3.0084797837040678, "grad_norm": 0.0719894822417738, "learning_rate": 7.2317306917909e-05, "loss": 0.8403, "step": 171360 }, { "epoch": 3.0086553485840692, "grad_norm": 0.04776238080810231, "learning_rate": 7.230664141535243e-05, "loss": 0.8367, "step": 171370 }, { "epoch": 3.0088309134640707, "grad_norm": 0.0524983991489296, "learning_rate": 7.229597627634618e-05, "loss": 0.8452, "step": 171380 }, { "epoch": 3.009006478344072, "grad_norm": 0.04526284060080202, "learning_rate": 7.228531150102547e-05, "loss": 0.838, "step": 171390 }, { "epoch": 3.0091820432240737, "grad_norm": 0.06607048784706504, "learning_rate": 7.227464708952535e-05, "loss": 0.8451, "step": 171400 }, { "epoch": 3.0093576081040747, "grad_norm": 0.04235195032095485, "learning_rate": 7.226398304198094e-05, "loss": 0.8374, "step": 171410 }, { "epoch": 3.009533172984076, "grad_norm": 0.07098243159545824, "learning_rate": 7.225331935852739e-05, "loss": 0.843, "step": 171420 }, { "epoch": 3.0097087378640777, "grad_norm": 0.10227379195254023, "learning_rate": 7.224265603929979e-05, "loss": 0.838, "step": 171430 }, { "epoch": 3.009884302744079, "grad_norm": 0.0544888541272487, "learning_rate": 7.223199308443327e-05, "loss": 0.8472, "step": 171440 }, { "epoch": 3.0100598676240806, "grad_norm": 0.06423822575947612, "learning_rate": 7.222133049406286e-05, "loss": 0.8443, "step": 171450 }, { "epoch": 3.010235432504082, "grad_norm": 0.05222744404746236, "learning_rate": 7.221066826832374e-05, "loss": 0.848, "step": 171460 }, { "epoch": 3.010410997384083, "grad_norm": 0.053549825193916406, "learning_rate": 7.220000640735099e-05, "loss": 0.8423, "step": 171470 }, { "epoch": 3.0105865622640846, "grad_norm": 0.052359154093377554, "learning_rate": 7.218934491127963e-05, "loss": 0.8475, "step": 171480 }, { "epoch": 3.010762127144086, "grad_norm": 0.043709399334888294, "learning_rate": 7.217868378024481e-05, "loss": 0.8432, "step": 171490 }, { "epoch": 3.0109376920240876, "grad_norm": 0.04786815940725118, "learning_rate": 7.216802301438161e-05, "loss": 0.8421, "step": 171500 }, { "epoch": 3.011113256904089, "grad_norm": 0.06263380414479991, "learning_rate": 7.215736261382508e-05, "loss": 0.8404, "step": 171510 }, { "epoch": 3.0112888217840905, "grad_norm": 0.05176284729224452, "learning_rate": 7.214670257871028e-05, "loss": 0.851, "step": 171520 }, { "epoch": 3.0114643866640916, "grad_norm": 0.05039396174548485, "learning_rate": 7.21360429091723e-05, "loss": 0.845, "step": 171530 }, { "epoch": 3.011639951544093, "grad_norm": 0.046737869298228756, "learning_rate": 7.212538360534617e-05, "loss": 0.8503, "step": 171540 }, { "epoch": 3.0118155164240945, "grad_norm": 0.0575551809825628, "learning_rate": 7.211472466736695e-05, "loss": 0.8429, "step": 171550 }, { "epoch": 3.011991081304096, "grad_norm": 0.057848700675114946, "learning_rate": 7.210406609536974e-05, "loss": 0.8468, "step": 171560 }, { "epoch": 3.0121666461840975, "grad_norm": 0.05866858581038805, "learning_rate": 7.209340788948955e-05, "loss": 0.8408, "step": 171570 }, { "epoch": 3.012342211064099, "grad_norm": 0.04591232141011969, "learning_rate": 7.208275004986139e-05, "loss": 0.8393, "step": 171580 }, { "epoch": 3.0125177759441, "grad_norm": 0.06020332312490891, "learning_rate": 7.207209257662037e-05, "loss": 0.8406, "step": 171590 }, { "epoch": 3.0126933408241015, "grad_norm": 0.058296126698897235, "learning_rate": 7.206143546990146e-05, "loss": 0.8404, "step": 171600 }, { "epoch": 3.012868905704103, "grad_norm": 0.08278322265125347, "learning_rate": 7.205077872983972e-05, "loss": 0.8438, "step": 171610 }, { "epoch": 3.0130444705841044, "grad_norm": 0.044582571045944555, "learning_rate": 7.204012235657012e-05, "loss": 0.8486, "step": 171620 }, { "epoch": 3.013220035464106, "grad_norm": 0.053815371859569264, "learning_rate": 7.202946635022776e-05, "loss": 0.8478, "step": 171630 }, { "epoch": 3.0133956003441074, "grad_norm": 0.052054315648345444, "learning_rate": 7.201881071094761e-05, "loss": 0.8463, "step": 171640 }, { "epoch": 3.0135711652241084, "grad_norm": 0.0576961316232121, "learning_rate": 7.200815543886467e-05, "loss": 0.8357, "step": 171650 }, { "epoch": 3.01374673010411, "grad_norm": 0.07717729838922582, "learning_rate": 7.199750053411398e-05, "loss": 0.8407, "step": 171660 }, { "epoch": 3.0139222949841113, "grad_norm": 0.06177731913046722, "learning_rate": 7.198684599683049e-05, "loss": 0.8496, "step": 171670 }, { "epoch": 3.014097859864113, "grad_norm": 0.051107954666948306, "learning_rate": 7.197619182714925e-05, "loss": 0.8444, "step": 171680 }, { "epoch": 3.0142734247441143, "grad_norm": 0.05122203871449132, "learning_rate": 7.196553802520519e-05, "loss": 0.843, "step": 171690 }, { "epoch": 3.014448989624116, "grad_norm": 0.04864713119767333, "learning_rate": 7.195488459113334e-05, "loss": 0.8486, "step": 171700 }, { "epoch": 3.014624554504117, "grad_norm": 0.05385484110137351, "learning_rate": 7.194423152506869e-05, "loss": 0.8452, "step": 171710 }, { "epoch": 3.0148001193841183, "grad_norm": 0.05351432321201021, "learning_rate": 7.193357882714612e-05, "loss": 0.8396, "step": 171720 }, { "epoch": 3.0149756842641198, "grad_norm": 0.08588695032615334, "learning_rate": 7.192292649750077e-05, "loss": 0.8454, "step": 171730 }, { "epoch": 3.0151512491441212, "grad_norm": 0.06366792725907204, "learning_rate": 7.191227453626747e-05, "loss": 0.8432, "step": 171740 }, { "epoch": 3.0153268140241227, "grad_norm": 0.05683266223105861, "learning_rate": 7.190162294358121e-05, "loss": 0.8399, "step": 171750 }, { "epoch": 3.015502378904124, "grad_norm": 0.04895540633145298, "learning_rate": 7.1890971719577e-05, "loss": 0.8405, "step": 171760 }, { "epoch": 3.0156779437841252, "grad_norm": 0.057470048599106316, "learning_rate": 7.188032086438976e-05, "loss": 0.8475, "step": 171770 }, { "epoch": 3.0158535086641267, "grad_norm": 0.08027046011449912, "learning_rate": 7.186967037815442e-05, "loss": 0.8387, "step": 171780 }, { "epoch": 3.016029073544128, "grad_norm": 0.05613393880626938, "learning_rate": 7.185902026100589e-05, "loss": 0.8439, "step": 171790 }, { "epoch": 3.0162046384241297, "grad_norm": 0.05034488390617439, "learning_rate": 7.184837051307922e-05, "loss": 0.8459, "step": 171800 }, { "epoch": 3.016380203304131, "grad_norm": 0.051801687225974356, "learning_rate": 7.183772113450928e-05, "loss": 0.8424, "step": 171810 }, { "epoch": 3.0165557681841326, "grad_norm": 0.06366488357883712, "learning_rate": 7.182707212543098e-05, "loss": 0.8457, "step": 171820 }, { "epoch": 3.0167313330641337, "grad_norm": 0.051173270081191845, "learning_rate": 7.18164234859793e-05, "loss": 0.8411, "step": 171830 }, { "epoch": 3.016906897944135, "grad_norm": 0.06017877441095334, "learning_rate": 7.180577521628909e-05, "loss": 0.8458, "step": 171840 }, { "epoch": 3.0170824628241366, "grad_norm": 0.057958741430119816, "learning_rate": 7.179512731649533e-05, "loss": 0.8382, "step": 171850 }, { "epoch": 3.017258027704138, "grad_norm": 0.051528943650910104, "learning_rate": 7.178447978673289e-05, "loss": 0.8481, "step": 171860 }, { "epoch": 3.0174335925841396, "grad_norm": 0.06901610870083762, "learning_rate": 7.177383262713669e-05, "loss": 0.8444, "step": 171870 }, { "epoch": 3.017609157464141, "grad_norm": 0.06128238154365561, "learning_rate": 7.176318583784165e-05, "loss": 0.8428, "step": 171880 }, { "epoch": 3.017784722344142, "grad_norm": 0.062326316536093135, "learning_rate": 7.17525394189826e-05, "loss": 0.8467, "step": 171890 }, { "epoch": 3.0179602872241436, "grad_norm": 0.06115650897793941, "learning_rate": 7.174189337069451e-05, "loss": 0.8431, "step": 171900 }, { "epoch": 3.018135852104145, "grad_norm": 0.05858376550323635, "learning_rate": 7.173124769311226e-05, "loss": 0.838, "step": 171910 }, { "epoch": 3.0183114169841465, "grad_norm": 0.04373776023326719, "learning_rate": 7.172060238637067e-05, "loss": 0.8493, "step": 171920 }, { "epoch": 3.018486981864148, "grad_norm": 0.04355841401362683, "learning_rate": 7.17099574506047e-05, "loss": 0.8356, "step": 171930 }, { "epoch": 3.0186625467441495, "grad_norm": 0.06144269779960712, "learning_rate": 7.169931288594917e-05, "loss": 0.8376, "step": 171940 }, { "epoch": 3.0188381116241505, "grad_norm": 0.06149665312306409, "learning_rate": 7.168866869253896e-05, "loss": 0.8416, "step": 171950 }, { "epoch": 3.019013676504152, "grad_norm": 0.044188521065432314, "learning_rate": 7.167802487050889e-05, "loss": 0.8394, "step": 171960 }, { "epoch": 3.0191892413841535, "grad_norm": 0.04673305666200695, "learning_rate": 7.166738141999391e-05, "loss": 0.8395, "step": 171970 }, { "epoch": 3.019364806264155, "grad_norm": 0.05184130773687192, "learning_rate": 7.165673834112884e-05, "loss": 0.8432, "step": 171980 }, { "epoch": 3.0195403711441564, "grad_norm": 0.0614901274198665, "learning_rate": 7.164609563404849e-05, "loss": 0.8392, "step": 171990 }, { "epoch": 3.019715936024158, "grad_norm": 0.05252103989414253, "learning_rate": 7.163545329888777e-05, "loss": 0.8376, "step": 172000 }, { "epoch": 3.019891500904159, "grad_norm": 0.05198473228854114, "learning_rate": 7.162481133578147e-05, "loss": 0.841, "step": 172010 }, { "epoch": 3.0200670657841604, "grad_norm": 0.0867712291364977, "learning_rate": 7.161416974486443e-05, "loss": 0.8418, "step": 172020 }, { "epoch": 3.020242630664162, "grad_norm": 0.07126694789426687, "learning_rate": 7.160352852627148e-05, "loss": 0.8499, "step": 172030 }, { "epoch": 3.0204181955441634, "grad_norm": 0.07217177983302062, "learning_rate": 7.159288768013747e-05, "loss": 0.8431, "step": 172040 }, { "epoch": 3.020593760424165, "grad_norm": 0.053495210461679495, "learning_rate": 7.158224720659723e-05, "loss": 0.839, "step": 172050 }, { "epoch": 3.0207693253041663, "grad_norm": 0.05748795478301729, "learning_rate": 7.157160710578551e-05, "loss": 0.8376, "step": 172060 }, { "epoch": 3.0209448901841673, "grad_norm": 0.06940956317927163, "learning_rate": 7.156096737783719e-05, "loss": 0.8442, "step": 172070 }, { "epoch": 3.021120455064169, "grad_norm": 0.04933989099311346, "learning_rate": 7.155032802288706e-05, "loss": 0.8428, "step": 172080 }, { "epoch": 3.0212960199441703, "grad_norm": 0.07548667199647713, "learning_rate": 7.153968904106991e-05, "loss": 0.8427, "step": 172090 }, { "epoch": 3.0214715848241718, "grad_norm": 0.0652787513373655, "learning_rate": 7.152905043252054e-05, "loss": 0.8386, "step": 172100 }, { "epoch": 3.0216471497041733, "grad_norm": 0.04554663715464172, "learning_rate": 7.151841219737378e-05, "loss": 0.8378, "step": 172110 }, { "epoch": 3.0218227145841747, "grad_norm": 0.052491160939757996, "learning_rate": 7.150777433576438e-05, "loss": 0.8413, "step": 172120 }, { "epoch": 3.021998279464176, "grad_norm": 0.048555715138503035, "learning_rate": 7.149713684782708e-05, "loss": 0.8451, "step": 172130 }, { "epoch": 3.0221738443441772, "grad_norm": 0.04318917019668998, "learning_rate": 7.148649973369677e-05, "loss": 0.8352, "step": 172140 }, { "epoch": 3.0223494092241787, "grad_norm": 0.05362128619887078, "learning_rate": 7.147586299350815e-05, "loss": 0.8479, "step": 172150 }, { "epoch": 3.02252497410418, "grad_norm": 0.06973730459390685, "learning_rate": 7.146522662739599e-05, "loss": 0.8439, "step": 172160 }, { "epoch": 3.0227005389841817, "grad_norm": 0.05810731040199876, "learning_rate": 7.145459063549507e-05, "loss": 0.8403, "step": 172170 }, { "epoch": 3.022876103864183, "grad_norm": 0.056031370114583215, "learning_rate": 7.144395501794018e-05, "loss": 0.8448, "step": 172180 }, { "epoch": 3.0230516687441846, "grad_norm": 0.04792246292254009, "learning_rate": 7.143331977486603e-05, "loss": 0.8457, "step": 172190 }, { "epoch": 3.0232272336241857, "grad_norm": 0.05664421900019744, "learning_rate": 7.142268490640735e-05, "loss": 0.8293, "step": 172200 }, { "epoch": 3.023402798504187, "grad_norm": 0.07839252510459502, "learning_rate": 7.141205041269896e-05, "loss": 0.8426, "step": 172210 }, { "epoch": 3.0235783633841886, "grad_norm": 0.05924131902736301, "learning_rate": 7.140141629387557e-05, "loss": 0.8482, "step": 172220 }, { "epoch": 3.02375392826419, "grad_norm": 0.053840825121746975, "learning_rate": 7.139078255007184e-05, "loss": 0.8353, "step": 172230 }, { "epoch": 3.0239294931441916, "grad_norm": 0.05157375267306251, "learning_rate": 7.138014918142261e-05, "loss": 0.8436, "step": 172240 }, { "epoch": 3.024105058024193, "grad_norm": 0.05603717100106106, "learning_rate": 7.13695161880626e-05, "loss": 0.8432, "step": 172250 }, { "epoch": 3.024280622904194, "grad_norm": 0.06653589936129839, "learning_rate": 7.135888357012645e-05, "loss": 0.8397, "step": 172260 }, { "epoch": 3.0244561877841956, "grad_norm": 0.05386793250359579, "learning_rate": 7.134825132774896e-05, "loss": 0.851, "step": 172270 }, { "epoch": 3.024631752664197, "grad_norm": 0.0533080091843935, "learning_rate": 7.133761946106481e-05, "loss": 0.8448, "step": 172280 }, { "epoch": 3.0248073175441985, "grad_norm": 0.05711837875665944, "learning_rate": 7.13269879702087e-05, "loss": 0.8405, "step": 172290 }, { "epoch": 3.0249828824242, "grad_norm": 0.057343958901438105, "learning_rate": 7.13163568553153e-05, "loss": 0.8487, "step": 172300 }, { "epoch": 3.0251584473042015, "grad_norm": 0.05986600183176731, "learning_rate": 7.130572611651938e-05, "loss": 0.8416, "step": 172310 }, { "epoch": 3.0253340121842025, "grad_norm": 0.05578259179574344, "learning_rate": 7.12950957539556e-05, "loss": 0.8383, "step": 172320 }, { "epoch": 3.025509577064204, "grad_norm": 0.06356693380948866, "learning_rate": 7.128446576775863e-05, "loss": 0.84, "step": 172330 }, { "epoch": 3.0256851419442055, "grad_norm": 0.06323896327916934, "learning_rate": 7.127383615806322e-05, "loss": 0.8536, "step": 172340 }, { "epoch": 3.025860706824207, "grad_norm": 0.053228481140856626, "learning_rate": 7.126320692500399e-05, "loss": 0.8392, "step": 172350 }, { "epoch": 3.0260362717042084, "grad_norm": 0.07768825709070243, "learning_rate": 7.125257806871562e-05, "loss": 0.8398, "step": 172360 }, { "epoch": 3.02621183658421, "grad_norm": 0.06127705314656558, "learning_rate": 7.124194958933276e-05, "loss": 0.837, "step": 172370 }, { "epoch": 3.026387401464211, "grad_norm": 0.05243373552527549, "learning_rate": 7.123132148699013e-05, "loss": 0.8435, "step": 172380 }, { "epoch": 3.0265629663442124, "grad_norm": 0.05251648133341359, "learning_rate": 7.122069376182235e-05, "loss": 0.8428, "step": 172390 }, { "epoch": 3.026738531224214, "grad_norm": 0.04693973753096576, "learning_rate": 7.121006641396407e-05, "loss": 0.842, "step": 172400 }, { "epoch": 3.0269140961042154, "grad_norm": 0.056254665679083646, "learning_rate": 7.119943944354997e-05, "loss": 0.8515, "step": 172410 }, { "epoch": 3.027089660984217, "grad_norm": 0.08051219997879495, "learning_rate": 7.118881285071471e-05, "loss": 0.853, "step": 172420 }, { "epoch": 3.0272652258642183, "grad_norm": 0.04713868061170432, "learning_rate": 7.117818663559287e-05, "loss": 0.8319, "step": 172430 }, { "epoch": 3.0274407907442193, "grad_norm": 0.06097676174743066, "learning_rate": 7.116756079831915e-05, "loss": 0.8414, "step": 172440 }, { "epoch": 3.027616355624221, "grad_norm": 0.055711766345339, "learning_rate": 7.115693533902815e-05, "loss": 0.8383, "step": 172450 }, { "epoch": 3.0277919205042223, "grad_norm": 0.06581109058639242, "learning_rate": 7.11463102578545e-05, "loss": 0.8374, "step": 172460 }, { "epoch": 3.027967485384224, "grad_norm": 0.05500091433784306, "learning_rate": 7.113568555493276e-05, "loss": 0.8413, "step": 172470 }, { "epoch": 3.0281430502642253, "grad_norm": 0.08660285193116445, "learning_rate": 7.112506123039767e-05, "loss": 0.8473, "step": 172480 }, { "epoch": 3.0283186151442267, "grad_norm": 0.06272971920377854, "learning_rate": 7.111443728438378e-05, "loss": 0.839, "step": 172490 }, { "epoch": 3.0284941800242278, "grad_norm": 0.04755037197203741, "learning_rate": 7.110381371702567e-05, "loss": 0.8438, "step": 172500 }, { "epoch": 3.0286697449042292, "grad_norm": 0.054499343450983444, "learning_rate": 7.109319052845799e-05, "loss": 0.8435, "step": 172510 }, { "epoch": 3.0288453097842307, "grad_norm": 0.05157908899243053, "learning_rate": 7.108256771881531e-05, "loss": 0.836, "step": 172520 }, { "epoch": 3.029020874664232, "grad_norm": 0.0638731669407866, "learning_rate": 7.107194528823225e-05, "loss": 0.8415, "step": 172530 }, { "epoch": 3.0291964395442337, "grad_norm": 0.0538921121706394, "learning_rate": 7.106132323684334e-05, "loss": 0.8422, "step": 172540 }, { "epoch": 3.029372004424235, "grad_norm": 0.07324206721748568, "learning_rate": 7.105070156478323e-05, "loss": 0.8428, "step": 172550 }, { "epoch": 3.029547569304236, "grad_norm": 0.04630736792881751, "learning_rate": 7.104008027218648e-05, "loss": 0.8415, "step": 172560 }, { "epoch": 3.0297231341842377, "grad_norm": 0.04668643355531951, "learning_rate": 7.10294593591876e-05, "loss": 0.8419, "step": 172570 }, { "epoch": 3.029898699064239, "grad_norm": 0.04894442682149659, "learning_rate": 7.101883882592127e-05, "loss": 0.8447, "step": 172580 }, { "epoch": 3.0300742639442406, "grad_norm": 0.04206262350029431, "learning_rate": 7.100821867252198e-05, "loss": 0.845, "step": 172590 }, { "epoch": 3.030249828824242, "grad_norm": 0.046986050088413514, "learning_rate": 7.099759889912429e-05, "loss": 0.8427, "step": 172600 }, { "epoch": 3.0304253937042436, "grad_norm": 0.05089779702812183, "learning_rate": 7.09869795058628e-05, "loss": 0.8465, "step": 172610 }, { "epoch": 3.0306009585842446, "grad_norm": 0.0583258138170358, "learning_rate": 7.097636049287202e-05, "loss": 0.8435, "step": 172620 }, { "epoch": 3.030776523464246, "grad_norm": 0.05916986742018186, "learning_rate": 7.096574186028652e-05, "loss": 0.8353, "step": 172630 }, { "epoch": 3.0309520883442476, "grad_norm": 0.05694817444877599, "learning_rate": 7.095512360824078e-05, "loss": 0.8406, "step": 172640 }, { "epoch": 3.031127653224249, "grad_norm": 0.05997310663476509, "learning_rate": 7.094450573686942e-05, "loss": 0.8406, "step": 172650 }, { "epoch": 3.0313032181042505, "grad_norm": 0.05615986337736947, "learning_rate": 7.093388824630693e-05, "loss": 0.8381, "step": 172660 }, { "epoch": 3.031478782984252, "grad_norm": 0.05079433867206173, "learning_rate": 7.092327113668782e-05, "loss": 0.8496, "step": 172670 }, { "epoch": 3.031654347864253, "grad_norm": 0.04809195516520837, "learning_rate": 7.091265440814666e-05, "loss": 0.8406, "step": 172680 }, { "epoch": 3.0318299127442545, "grad_norm": 0.05564907077020405, "learning_rate": 7.090203806081792e-05, "loss": 0.8453, "step": 172690 }, { "epoch": 3.032005477624256, "grad_norm": 0.04921245636231936, "learning_rate": 7.089142209483613e-05, "loss": 0.8363, "step": 172700 }, { "epoch": 3.0321810425042575, "grad_norm": 0.059636878060774334, "learning_rate": 7.088080651033578e-05, "loss": 0.843, "step": 172710 }, { "epoch": 3.032356607384259, "grad_norm": 0.04965137567824993, "learning_rate": 7.087019130745141e-05, "loss": 0.8491, "step": 172720 }, { "epoch": 3.0325321722642604, "grad_norm": 0.06702268071942273, "learning_rate": 7.085957648631748e-05, "loss": 0.8354, "step": 172730 }, { "epoch": 3.0327077371442615, "grad_norm": 0.05231505185436787, "learning_rate": 7.084896204706846e-05, "loss": 0.8427, "step": 172740 }, { "epoch": 3.032883302024263, "grad_norm": 0.05090390795837563, "learning_rate": 7.083834798983889e-05, "loss": 0.8384, "step": 172750 }, { "epoch": 3.0330588669042644, "grad_norm": 0.058114870849432974, "learning_rate": 7.082773431476326e-05, "loss": 0.8439, "step": 172760 }, { "epoch": 3.033234431784266, "grad_norm": 0.057083751559024315, "learning_rate": 7.081712102197598e-05, "loss": 0.8479, "step": 172770 }, { "epoch": 3.0334099966642674, "grad_norm": 0.05363797974967333, "learning_rate": 7.080650811161157e-05, "loss": 0.8409, "step": 172780 }, { "epoch": 3.033585561544269, "grad_norm": 0.05320532278770529, "learning_rate": 7.07958955838045e-05, "loss": 0.84, "step": 172790 }, { "epoch": 3.03376112642427, "grad_norm": 0.047268831620782444, "learning_rate": 7.078528343868922e-05, "loss": 0.8439, "step": 172800 }, { "epoch": 3.0339366913042713, "grad_norm": 0.0601084984745601, "learning_rate": 7.077467167640015e-05, "loss": 0.8376, "step": 172810 }, { "epoch": 3.034112256184273, "grad_norm": 0.0461607022726688, "learning_rate": 7.076406029707184e-05, "loss": 0.8456, "step": 172820 }, { "epoch": 3.0342878210642743, "grad_norm": 0.06932266221572474, "learning_rate": 7.075344930083866e-05, "loss": 0.8432, "step": 172830 }, { "epoch": 3.034463385944276, "grad_norm": 0.04797413783053031, "learning_rate": 7.074283868783506e-05, "loss": 0.8452, "step": 172840 }, { "epoch": 3.0346389508242773, "grad_norm": 0.05329475115306119, "learning_rate": 7.073222845819552e-05, "loss": 0.8439, "step": 172850 }, { "epoch": 3.0348145157042783, "grad_norm": 0.04780938442012379, "learning_rate": 7.072161861205443e-05, "loss": 0.8442, "step": 172860 }, { "epoch": 3.0349900805842798, "grad_norm": 0.07055280482767992, "learning_rate": 7.071100914954625e-05, "loss": 0.8517, "step": 172870 }, { "epoch": 3.0351656454642812, "grad_norm": 0.04276490135469776, "learning_rate": 7.070040007080537e-05, "loss": 0.846, "step": 172880 }, { "epoch": 3.0353412103442827, "grad_norm": 0.05657317176390512, "learning_rate": 7.068979137596622e-05, "loss": 0.8408, "step": 172890 }, { "epoch": 3.035516775224284, "grad_norm": 0.06521364471169394, "learning_rate": 7.067918306516325e-05, "loss": 0.8466, "step": 172900 }, { "epoch": 3.0356923401042857, "grad_norm": 0.0543013284594152, "learning_rate": 7.066857513853079e-05, "loss": 0.8402, "step": 172910 }, { "epoch": 3.035867904984287, "grad_norm": 0.07640849931290362, "learning_rate": 7.065796759620334e-05, "loss": 0.8454, "step": 172920 }, { "epoch": 3.036043469864288, "grad_norm": 0.05345044512660462, "learning_rate": 7.064736043831525e-05, "loss": 0.8484, "step": 172930 }, { "epoch": 3.0362190347442897, "grad_norm": 0.04830476638669778, "learning_rate": 7.063675366500092e-05, "loss": 0.8411, "step": 172940 }, { "epoch": 3.036394599624291, "grad_norm": 0.04921905324757683, "learning_rate": 7.062614727639474e-05, "loss": 0.8456, "step": 172950 }, { "epoch": 3.0365701645042926, "grad_norm": 0.043879005400671224, "learning_rate": 7.06155412726311e-05, "loss": 0.8519, "step": 172960 }, { "epoch": 3.036745729384294, "grad_norm": 0.07406466257141797, "learning_rate": 7.060493565384438e-05, "loss": 0.8451, "step": 172970 }, { "epoch": 3.0369212942642956, "grad_norm": 0.06463953759465707, "learning_rate": 7.059433042016891e-05, "loss": 0.8401, "step": 172980 }, { "epoch": 3.0370968591442966, "grad_norm": 0.061753542796278475, "learning_rate": 7.058372557173914e-05, "loss": 0.8312, "step": 172990 }, { "epoch": 3.037272424024298, "grad_norm": 0.0564265341997786, "learning_rate": 7.05731211086894e-05, "loss": 0.8398, "step": 173000 }, { "epoch": 3.0374479889042996, "grad_norm": 0.060578271739294, "learning_rate": 7.056251703115402e-05, "loss": 0.8447, "step": 173010 }, { "epoch": 3.037623553784301, "grad_norm": 0.06502500628468491, "learning_rate": 7.05519133392674e-05, "loss": 0.8423, "step": 173020 }, { "epoch": 3.0377991186643025, "grad_norm": 0.05420499615889656, "learning_rate": 7.054131003316387e-05, "loss": 0.8342, "step": 173030 }, { "epoch": 3.037974683544304, "grad_norm": 0.05161584000214233, "learning_rate": 7.05307071129778e-05, "loss": 0.8398, "step": 173040 }, { "epoch": 3.038150248424305, "grad_norm": 0.05058747075460645, "learning_rate": 7.052010457884346e-05, "loss": 0.8419, "step": 173050 }, { "epoch": 3.0383258133043065, "grad_norm": 0.05348940316953709, "learning_rate": 7.050950243089528e-05, "loss": 0.8412, "step": 173060 }, { "epoch": 3.038501378184308, "grad_norm": 0.06461307732056834, "learning_rate": 7.049890066926755e-05, "loss": 0.8359, "step": 173070 }, { "epoch": 3.0386769430643095, "grad_norm": 0.06632527936374245, "learning_rate": 7.048829929409453e-05, "loss": 0.8373, "step": 173080 }, { "epoch": 3.038852507944311, "grad_norm": 0.05455534089937088, "learning_rate": 7.047769830551067e-05, "loss": 0.8408, "step": 173090 }, { "epoch": 3.0390280728243124, "grad_norm": 0.048241723224340546, "learning_rate": 7.046709770365021e-05, "loss": 0.8457, "step": 173100 }, { "epoch": 3.0392036377043135, "grad_norm": 0.0655389397385138, "learning_rate": 7.045649748864745e-05, "loss": 0.8448, "step": 173110 }, { "epoch": 3.039379202584315, "grad_norm": 0.07176156771966735, "learning_rate": 7.044589766063675e-05, "loss": 0.8495, "step": 173120 }, { "epoch": 3.0395547674643164, "grad_norm": 0.06246732258535992, "learning_rate": 7.043529821975237e-05, "loss": 0.8365, "step": 173130 }, { "epoch": 3.039730332344318, "grad_norm": 0.046905147865942194, "learning_rate": 7.042469916612864e-05, "loss": 0.8501, "step": 173140 }, { "epoch": 3.0399058972243194, "grad_norm": 0.05967482271792556, "learning_rate": 7.041410049989978e-05, "loss": 0.8357, "step": 173150 }, { "epoch": 3.040081462104321, "grad_norm": 0.04903925129634139, "learning_rate": 7.040350222120017e-05, "loss": 0.8478, "step": 173160 }, { "epoch": 3.040257026984322, "grad_norm": 0.04946229900998376, "learning_rate": 7.039290433016404e-05, "loss": 0.8388, "step": 173170 }, { "epoch": 3.0404325918643234, "grad_norm": 0.05485817701394587, "learning_rate": 7.038230682692567e-05, "loss": 0.844, "step": 173180 }, { "epoch": 3.040608156744325, "grad_norm": 0.05003868847570746, "learning_rate": 7.037170971161936e-05, "loss": 0.8431, "step": 173190 }, { "epoch": 3.0407837216243263, "grad_norm": 0.06397365191101874, "learning_rate": 7.036111298437935e-05, "loss": 0.8437, "step": 173200 }, { "epoch": 3.040959286504328, "grad_norm": 0.06188433592504548, "learning_rate": 7.035051664533993e-05, "loss": 0.8458, "step": 173210 }, { "epoch": 3.0411348513843293, "grad_norm": 0.05308355829870945, "learning_rate": 7.033992069463528e-05, "loss": 0.8419, "step": 173220 }, { "epoch": 3.0413104162643303, "grad_norm": 0.06625264467141226, "learning_rate": 7.032932513239977e-05, "loss": 0.8421, "step": 173230 }, { "epoch": 3.0414859811443318, "grad_norm": 0.05299396195324524, "learning_rate": 7.031872995876757e-05, "loss": 0.8475, "step": 173240 }, { "epoch": 3.0416615460243333, "grad_norm": 0.07670345444668518, "learning_rate": 7.03081351738729e-05, "loss": 0.837, "step": 173250 }, { "epoch": 3.0418371109043347, "grad_norm": 0.06299600949857569, "learning_rate": 7.029754077785008e-05, "loss": 0.8429, "step": 173260 }, { "epoch": 3.042012675784336, "grad_norm": 0.07997691448158156, "learning_rate": 7.02869467708333e-05, "loss": 0.8404, "step": 173270 }, { "epoch": 3.0421882406643377, "grad_norm": 0.04708935134376923, "learning_rate": 7.027635315295679e-05, "loss": 0.8348, "step": 173280 }, { "epoch": 3.0423638055443387, "grad_norm": 0.055265539765142514, "learning_rate": 7.026575992435479e-05, "loss": 0.8427, "step": 173290 }, { "epoch": 3.04253937042434, "grad_norm": 0.057410297703122365, "learning_rate": 7.025516708516149e-05, "loss": 0.8468, "step": 173300 }, { "epoch": 3.0427149353043417, "grad_norm": 0.05845430531114512, "learning_rate": 7.02445746355111e-05, "loss": 0.853, "step": 173310 }, { "epoch": 3.042890500184343, "grad_norm": 0.04977321702769646, "learning_rate": 7.023398257553782e-05, "loss": 0.8406, "step": 173320 }, { "epoch": 3.0430660650643446, "grad_norm": 0.07370513848652244, "learning_rate": 7.022339090537592e-05, "loss": 0.8435, "step": 173330 }, { "epoch": 3.043241629944346, "grad_norm": 0.051193298554062905, "learning_rate": 7.021279962515956e-05, "loss": 0.8421, "step": 173340 }, { "epoch": 3.043417194824347, "grad_norm": 0.04826401085479484, "learning_rate": 7.02022087350229e-05, "loss": 0.8357, "step": 173350 }, { "epoch": 3.0435927597043486, "grad_norm": 0.060832224072190764, "learning_rate": 7.019161823510018e-05, "loss": 0.8359, "step": 173360 }, { "epoch": 3.04376832458435, "grad_norm": 0.05087403071368211, "learning_rate": 7.018102812552556e-05, "loss": 0.839, "step": 173370 }, { "epoch": 3.0439438894643516, "grad_norm": 0.07831284944984315, "learning_rate": 7.017043840643323e-05, "loss": 0.8342, "step": 173380 }, { "epoch": 3.044119454344353, "grad_norm": 0.04225407351434069, "learning_rate": 7.015984907795731e-05, "loss": 0.8431, "step": 173390 }, { "epoch": 3.0442950192243545, "grad_norm": 0.05024200982455116, "learning_rate": 7.014926014023205e-05, "loss": 0.8436, "step": 173400 }, { "epoch": 3.0444705841043556, "grad_norm": 0.05069432546986197, "learning_rate": 7.013867159339157e-05, "loss": 0.8339, "step": 173410 }, { "epoch": 3.044646148984357, "grad_norm": 0.048480662861661, "learning_rate": 7.012808343757001e-05, "loss": 0.8257, "step": 173420 }, { "epoch": 3.0448217138643585, "grad_norm": 0.05210376152918246, "learning_rate": 7.011749567290157e-05, "loss": 0.8458, "step": 173430 }, { "epoch": 3.04499727874436, "grad_norm": 0.05657730404229541, "learning_rate": 7.010690829952039e-05, "loss": 0.8388, "step": 173440 }, { "epoch": 3.0451728436243615, "grad_norm": 0.04813330064381283, "learning_rate": 7.009632131756059e-05, "loss": 0.8385, "step": 173450 }, { "epoch": 3.045348408504363, "grad_norm": 0.05255122886194093, "learning_rate": 7.008573472715633e-05, "loss": 0.8389, "step": 173460 }, { "epoch": 3.045523973384364, "grad_norm": 0.039849894979013235, "learning_rate": 7.007514852844174e-05, "loss": 0.8411, "step": 173470 }, { "epoch": 3.0456995382643655, "grad_norm": 0.05555755551858423, "learning_rate": 7.006456272155094e-05, "loss": 0.8373, "step": 173480 }, { "epoch": 3.045875103144367, "grad_norm": 0.06562046001620803, "learning_rate": 7.005397730661801e-05, "loss": 0.8477, "step": 173490 }, { "epoch": 3.0460506680243684, "grad_norm": 0.06519058971802054, "learning_rate": 7.004339228377719e-05, "loss": 0.8416, "step": 173500 }, { "epoch": 3.04622623290437, "grad_norm": 0.05326189921297259, "learning_rate": 7.003280765316248e-05, "loss": 0.8454, "step": 173510 }, { "epoch": 3.0464017977843714, "grad_norm": 0.08044783269062097, "learning_rate": 7.002222341490805e-05, "loss": 0.844, "step": 173520 }, { "epoch": 3.0465773626643724, "grad_norm": 0.06259262853439787, "learning_rate": 7.001163956914799e-05, "loss": 0.8499, "step": 173530 }, { "epoch": 3.046752927544374, "grad_norm": 0.06547296510287215, "learning_rate": 7.00010561160164e-05, "loss": 0.8471, "step": 173540 }, { "epoch": 3.0469284924243754, "grad_norm": 0.06467025080221318, "learning_rate": 6.999047305564736e-05, "loss": 0.8466, "step": 173550 }, { "epoch": 3.047104057304377, "grad_norm": 0.08015807254613085, "learning_rate": 6.997989038817495e-05, "loss": 0.8426, "step": 173560 }, { "epoch": 3.0472796221843783, "grad_norm": 0.0744323800105358, "learning_rate": 6.99693081137333e-05, "loss": 0.8403, "step": 173570 }, { "epoch": 3.04745518706438, "grad_norm": 0.08763269363198892, "learning_rate": 6.995872623245648e-05, "loss": 0.8348, "step": 173580 }, { "epoch": 3.0476307519443813, "grad_norm": 0.05426240818597345, "learning_rate": 6.994814474447848e-05, "loss": 0.8446, "step": 173590 }, { "epoch": 3.0478063168243823, "grad_norm": 0.052981154520635856, "learning_rate": 6.993756364993348e-05, "loss": 0.8414, "step": 173600 }, { "epoch": 3.0479818817043838, "grad_norm": 0.05994469881455825, "learning_rate": 6.992698294895551e-05, "loss": 0.8417, "step": 173610 }, { "epoch": 3.0481574465843853, "grad_norm": 0.059044249704652094, "learning_rate": 6.99164026416786e-05, "loss": 0.8447, "step": 173620 }, { "epoch": 3.0483330114643867, "grad_norm": 0.05003088962790335, "learning_rate": 6.990582272823685e-05, "loss": 0.8518, "step": 173630 }, { "epoch": 3.048508576344388, "grad_norm": 0.04738719253185804, "learning_rate": 6.989524320876428e-05, "loss": 0.8378, "step": 173640 }, { "epoch": 3.0486841412243897, "grad_norm": 0.050650855342271865, "learning_rate": 6.988466408339493e-05, "loss": 0.8438, "step": 173650 }, { "epoch": 3.0488597061043907, "grad_norm": 0.06603929581385094, "learning_rate": 6.987408535226283e-05, "loss": 0.8444, "step": 173660 }, { "epoch": 3.049035270984392, "grad_norm": 0.055592158154153345, "learning_rate": 6.986350701550206e-05, "loss": 0.8427, "step": 173670 }, { "epoch": 3.0492108358643937, "grad_norm": 0.04816850471345703, "learning_rate": 6.985292907324663e-05, "loss": 0.8431, "step": 173680 }, { "epoch": 3.049386400744395, "grad_norm": 0.05013971255705688, "learning_rate": 6.984235152563053e-05, "loss": 0.8496, "step": 173690 }, { "epoch": 3.0495619656243966, "grad_norm": 0.05964723107243261, "learning_rate": 6.983177437278783e-05, "loss": 0.8391, "step": 173700 }, { "epoch": 3.049737530504398, "grad_norm": 0.0578709966732916, "learning_rate": 6.982119761485254e-05, "loss": 0.8475, "step": 173710 }, { "epoch": 3.049913095384399, "grad_norm": 0.04942109564949152, "learning_rate": 6.981062125195863e-05, "loss": 0.8387, "step": 173720 }, { "epoch": 3.0500886602644006, "grad_norm": 0.06842406417579507, "learning_rate": 6.980004528424012e-05, "loss": 0.8489, "step": 173730 }, { "epoch": 3.050264225144402, "grad_norm": 0.07206835716857488, "learning_rate": 6.978946971183101e-05, "loss": 0.8478, "step": 173740 }, { "epoch": 3.0504397900244036, "grad_norm": 0.05962457851345982, "learning_rate": 6.977889453486534e-05, "loss": 0.8406, "step": 173750 }, { "epoch": 3.050615354904405, "grad_norm": 0.06967525239292123, "learning_rate": 6.976831975347698e-05, "loss": 0.8376, "step": 173760 }, { "epoch": 3.0507909197844065, "grad_norm": 0.043820194536863664, "learning_rate": 6.975774536780005e-05, "loss": 0.8429, "step": 173770 }, { "epoch": 3.0509664846644076, "grad_norm": 0.05341571035611082, "learning_rate": 6.974717137796849e-05, "loss": 0.841, "step": 173780 }, { "epoch": 3.051142049544409, "grad_norm": 0.04986556389563427, "learning_rate": 6.973659778411622e-05, "loss": 0.8492, "step": 173790 }, { "epoch": 3.0513176144244105, "grad_norm": 0.05773644246394325, "learning_rate": 6.972602458637728e-05, "loss": 0.8453, "step": 173800 }, { "epoch": 3.051493179304412, "grad_norm": 0.04607355626680451, "learning_rate": 6.971545178488559e-05, "loss": 0.8409, "step": 173810 }, { "epoch": 3.0516687441844135, "grad_norm": 0.051457176268794036, "learning_rate": 6.970487937977513e-05, "loss": 0.8462, "step": 173820 }, { "epoch": 3.051844309064415, "grad_norm": 0.06536731029414364, "learning_rate": 6.96943073711798e-05, "loss": 0.8377, "step": 173830 }, { "epoch": 3.052019873944416, "grad_norm": 0.05826615137146788, "learning_rate": 6.968373575923363e-05, "loss": 0.8364, "step": 173840 }, { "epoch": 3.0521954388244175, "grad_norm": 0.07065246489473959, "learning_rate": 6.967316454407053e-05, "loss": 0.8443, "step": 173850 }, { "epoch": 3.052371003704419, "grad_norm": 0.04763790920865287, "learning_rate": 6.966259372582443e-05, "loss": 0.8308, "step": 173860 }, { "epoch": 3.0525465685844204, "grad_norm": 0.04769109975511524, "learning_rate": 6.965202330462928e-05, "loss": 0.8426, "step": 173870 }, { "epoch": 3.052722133464422, "grad_norm": 0.048640788517954024, "learning_rate": 6.964145328061902e-05, "loss": 0.8399, "step": 173880 }, { "epoch": 3.0528976983444234, "grad_norm": 0.0498953073140878, "learning_rate": 6.963088365392755e-05, "loss": 0.849, "step": 173890 }, { "epoch": 3.0530732632244244, "grad_norm": 0.04868965831044402, "learning_rate": 6.962031442468877e-05, "loss": 0.8413, "step": 173900 }, { "epoch": 3.053248828104426, "grad_norm": 0.05447753779115411, "learning_rate": 6.960974559303663e-05, "loss": 0.839, "step": 173910 }, { "epoch": 3.0534243929844274, "grad_norm": 0.05274451112376426, "learning_rate": 6.959917715910505e-05, "loss": 0.8394, "step": 173920 }, { "epoch": 3.053599957864429, "grad_norm": 0.07299483566835412, "learning_rate": 6.958860912302788e-05, "loss": 0.8464, "step": 173930 }, { "epoch": 3.0537755227444303, "grad_norm": 0.04719657125196367, "learning_rate": 6.957804148493908e-05, "loss": 0.843, "step": 173940 }, { "epoch": 3.053951087624432, "grad_norm": 0.05102452316362669, "learning_rate": 6.956747424497251e-05, "loss": 0.8512, "step": 173950 }, { "epoch": 3.054126652504433, "grad_norm": 0.051632547116908, "learning_rate": 6.955690740326205e-05, "loss": 0.8398, "step": 173960 }, { "epoch": 3.0543022173844343, "grad_norm": 0.05136740942668013, "learning_rate": 6.954634095994162e-05, "loss": 0.8486, "step": 173970 }, { "epoch": 3.054477782264436, "grad_norm": 0.06283400321867234, "learning_rate": 6.953577491514508e-05, "loss": 0.8408, "step": 173980 }, { "epoch": 3.0546533471444373, "grad_norm": 0.05829613338066152, "learning_rate": 6.952520926900631e-05, "loss": 0.8463, "step": 173990 }, { "epoch": 3.0548289120244387, "grad_norm": 0.04128316142372947, "learning_rate": 6.951464402165911e-05, "loss": 0.8356, "step": 174000 }, { "epoch": 3.05500447690444, "grad_norm": 0.046411070129892984, "learning_rate": 6.950407917323746e-05, "loss": 0.8424, "step": 174010 }, { "epoch": 3.0551800417844412, "grad_norm": 0.06371364872535651, "learning_rate": 6.949351472387516e-05, "loss": 0.8427, "step": 174020 }, { "epoch": 3.0553556066644427, "grad_norm": 0.05325020547797375, "learning_rate": 6.948295067370604e-05, "loss": 0.8481, "step": 174030 }, { "epoch": 3.055531171544444, "grad_norm": 0.06677676058702173, "learning_rate": 6.947238702286401e-05, "loss": 0.8423, "step": 174040 }, { "epoch": 3.0557067364244457, "grad_norm": 0.04529182246034823, "learning_rate": 6.946182377148285e-05, "loss": 0.8424, "step": 174050 }, { "epoch": 3.055882301304447, "grad_norm": 0.06733486078696131, "learning_rate": 6.945126091969645e-05, "loss": 0.8432, "step": 174060 }, { "epoch": 3.0560578661844486, "grad_norm": 0.05619954467889164, "learning_rate": 6.94406984676386e-05, "loss": 0.845, "step": 174070 }, { "epoch": 3.0562334310644497, "grad_norm": 0.05944784101518529, "learning_rate": 6.943013641544317e-05, "loss": 0.8438, "step": 174080 }, { "epoch": 3.056408995944451, "grad_norm": 0.06389497997601021, "learning_rate": 6.941957476324395e-05, "loss": 0.8371, "step": 174090 }, { "epoch": 3.0565845608244526, "grad_norm": 0.07560146476550507, "learning_rate": 6.940901351117475e-05, "loss": 0.8346, "step": 174100 }, { "epoch": 3.056760125704454, "grad_norm": 0.04792228435811361, "learning_rate": 6.939845265936944e-05, "loss": 0.8285, "step": 174110 }, { "epoch": 3.0569356905844556, "grad_norm": 0.056535474201700416, "learning_rate": 6.938789220796177e-05, "loss": 0.8362, "step": 174120 }, { "epoch": 3.057111255464457, "grad_norm": 0.046200262614949875, "learning_rate": 6.937733215708555e-05, "loss": 0.8455, "step": 174130 }, { "epoch": 3.057286820344458, "grad_norm": 0.0511625977841541, "learning_rate": 6.936677250687462e-05, "loss": 0.8383, "step": 174140 }, { "epoch": 3.0574623852244596, "grad_norm": 0.05199580670713242, "learning_rate": 6.935621325746275e-05, "loss": 0.851, "step": 174150 }, { "epoch": 3.057637950104461, "grad_norm": 0.06118858024287419, "learning_rate": 6.93456544089837e-05, "loss": 0.8403, "step": 174160 }, { "epoch": 3.0578135149844625, "grad_norm": 0.10993807774464165, "learning_rate": 6.933509596157125e-05, "loss": 0.8367, "step": 174170 }, { "epoch": 3.057989079864464, "grad_norm": 0.06633375408116819, "learning_rate": 6.932453791535923e-05, "loss": 0.8373, "step": 174180 }, { "epoch": 3.0581646447444655, "grad_norm": 0.04595142975735656, "learning_rate": 6.931398027048141e-05, "loss": 0.8426, "step": 174190 }, { "epoch": 3.0583402096244665, "grad_norm": 0.05399029337277553, "learning_rate": 6.93034230270715e-05, "loss": 0.8406, "step": 174200 }, { "epoch": 3.058515774504468, "grad_norm": 0.05043894794795184, "learning_rate": 6.929286618526331e-05, "loss": 0.8438, "step": 174210 }, { "epoch": 3.0586913393844695, "grad_norm": 0.05178264990659348, "learning_rate": 6.92823097451906e-05, "loss": 0.8431, "step": 174220 }, { "epoch": 3.058866904264471, "grad_norm": 0.0794970105199614, "learning_rate": 6.927175370698709e-05, "loss": 0.8484, "step": 174230 }, { "epoch": 3.0590424691444724, "grad_norm": 0.0531910872428408, "learning_rate": 6.926119807078651e-05, "loss": 0.851, "step": 174240 }, { "epoch": 3.059218034024474, "grad_norm": 0.05309814249247087, "learning_rate": 6.925064283672268e-05, "loss": 0.8401, "step": 174250 }, { "epoch": 3.059393598904475, "grad_norm": 0.057134115953952826, "learning_rate": 6.924008800492927e-05, "loss": 0.8343, "step": 174260 }, { "epoch": 3.0595691637844764, "grad_norm": 0.05321522419768154, "learning_rate": 6.922953357553999e-05, "loss": 0.8331, "step": 174270 }, { "epoch": 3.059744728664478, "grad_norm": 0.048069267737691004, "learning_rate": 6.921897954868866e-05, "loss": 0.8378, "step": 174280 }, { "epoch": 3.0599202935444794, "grad_norm": 0.04061403360167974, "learning_rate": 6.920842592450895e-05, "loss": 0.837, "step": 174290 }, { "epoch": 3.060095858424481, "grad_norm": 0.05095493079812211, "learning_rate": 6.919787270313455e-05, "loss": 0.8496, "step": 174300 }, { "epoch": 3.0602714233044823, "grad_norm": 0.0439606391573628, "learning_rate": 6.918731988469923e-05, "loss": 0.8464, "step": 174310 }, { "epoch": 3.0604469881844834, "grad_norm": 0.051748742201727246, "learning_rate": 6.917676746933666e-05, "loss": 0.8479, "step": 174320 }, { "epoch": 3.060622553064485, "grad_norm": 0.055651721237335514, "learning_rate": 6.916621545718053e-05, "loss": 0.8468, "step": 174330 }, { "epoch": 3.0607981179444863, "grad_norm": 0.048377611233925644, "learning_rate": 6.915566384836451e-05, "loss": 0.84, "step": 174340 }, { "epoch": 3.060973682824488, "grad_norm": 0.053292743521563685, "learning_rate": 6.91451126430224e-05, "loss": 0.8481, "step": 174350 }, { "epoch": 3.0611492477044893, "grad_norm": 0.058338226179061564, "learning_rate": 6.913456184128779e-05, "loss": 0.8391, "step": 174360 }, { "epoch": 3.0613248125844907, "grad_norm": 0.04803313050527189, "learning_rate": 6.912401144329438e-05, "loss": 0.8412, "step": 174370 }, { "epoch": 3.061500377464492, "grad_norm": 0.04499671751068121, "learning_rate": 6.911346144917587e-05, "loss": 0.8522, "step": 174380 }, { "epoch": 3.0616759423444933, "grad_norm": 0.048902781298086596, "learning_rate": 6.910291185906592e-05, "loss": 0.8452, "step": 174390 }, { "epoch": 3.0618515072244947, "grad_norm": 0.04797037196910991, "learning_rate": 6.909236267309819e-05, "loss": 0.8425, "step": 174400 }, { "epoch": 3.062027072104496, "grad_norm": 0.05688451346478783, "learning_rate": 6.908181389140631e-05, "loss": 0.8465, "step": 174410 }, { "epoch": 3.0622026369844977, "grad_norm": 0.0481924336261629, "learning_rate": 6.9071265514124e-05, "loss": 0.8479, "step": 174420 }, { "epoch": 3.062378201864499, "grad_norm": 0.05666547797694573, "learning_rate": 6.906071754138485e-05, "loss": 0.8338, "step": 174430 }, { "epoch": 3.0625537667445006, "grad_norm": 0.06354938452845219, "learning_rate": 6.905016997332252e-05, "loss": 0.843, "step": 174440 }, { "epoch": 3.0627293316245017, "grad_norm": 0.05491407511581481, "learning_rate": 6.903962281007069e-05, "loss": 0.8468, "step": 174450 }, { "epoch": 3.062904896504503, "grad_norm": 0.04309427314161035, "learning_rate": 6.902907605176296e-05, "loss": 0.8432, "step": 174460 }, { "epoch": 3.0630804613845046, "grad_norm": 0.04657696617423997, "learning_rate": 6.901852969853294e-05, "loss": 0.84, "step": 174470 }, { "epoch": 3.063256026264506, "grad_norm": 0.04825358296349173, "learning_rate": 6.90079837505143e-05, "loss": 0.8478, "step": 174480 }, { "epoch": 3.0634315911445076, "grad_norm": 0.06518439805039182, "learning_rate": 6.899743820784065e-05, "loss": 0.8393, "step": 174490 }, { "epoch": 3.063607156024509, "grad_norm": 0.045205914944833815, "learning_rate": 6.898689307064557e-05, "loss": 0.8409, "step": 174500 }, { "epoch": 3.06378272090451, "grad_norm": 0.05953694022558826, "learning_rate": 6.897634833906266e-05, "loss": 0.8336, "step": 174510 }, { "epoch": 3.0639582857845116, "grad_norm": 0.049127981059010895, "learning_rate": 6.896580401322559e-05, "loss": 0.845, "step": 174520 }, { "epoch": 3.064133850664513, "grad_norm": 0.06665356936078108, "learning_rate": 6.895526009326793e-05, "loss": 0.8357, "step": 174530 }, { "epoch": 3.0643094155445145, "grad_norm": 0.050861410333385995, "learning_rate": 6.894471657932325e-05, "loss": 0.8454, "step": 174540 }, { "epoch": 3.064484980424516, "grad_norm": 0.06374390076441983, "learning_rate": 6.893417347152516e-05, "loss": 0.8381, "step": 174550 }, { "epoch": 3.0646605453045175, "grad_norm": 0.04712434521444203, "learning_rate": 6.892363077000725e-05, "loss": 0.8421, "step": 174560 }, { "epoch": 3.0648361101845185, "grad_norm": 0.0537881767744789, "learning_rate": 6.891308847490307e-05, "loss": 0.8372, "step": 174570 }, { "epoch": 3.06501167506452, "grad_norm": 0.05324810737999309, "learning_rate": 6.890254658634621e-05, "loss": 0.8483, "step": 174580 }, { "epoch": 3.0651872399445215, "grad_norm": 0.061145040453798684, "learning_rate": 6.889200510447025e-05, "loss": 0.849, "step": 174590 }, { "epoch": 3.065362804824523, "grad_norm": 0.05605425793790212, "learning_rate": 6.888146402940872e-05, "loss": 0.8401, "step": 174600 }, { "epoch": 3.0655383697045244, "grad_norm": 0.05553808506131313, "learning_rate": 6.887092336129518e-05, "loss": 0.8379, "step": 174610 }, { "epoch": 3.065713934584526, "grad_norm": 0.05856780217924377, "learning_rate": 6.886038310026322e-05, "loss": 0.8371, "step": 174620 }, { "epoch": 3.065889499464527, "grad_norm": 0.07153262635977008, "learning_rate": 6.884984324644637e-05, "loss": 0.8367, "step": 174630 }, { "epoch": 3.0660650643445284, "grad_norm": 0.05428332975906769, "learning_rate": 6.883930379997818e-05, "loss": 0.8409, "step": 174640 }, { "epoch": 3.06624062922453, "grad_norm": 0.06646132101483684, "learning_rate": 6.882876476099215e-05, "loss": 0.8282, "step": 174650 }, { "epoch": 3.0664161941045314, "grad_norm": 0.05980256004690444, "learning_rate": 6.881822612962185e-05, "loss": 0.845, "step": 174660 }, { "epoch": 3.066591758984533, "grad_norm": 0.07475236818628811, "learning_rate": 6.880768790600079e-05, "loss": 0.8404, "step": 174670 }, { "epoch": 3.0667673238645343, "grad_norm": 0.058345500722714494, "learning_rate": 6.879715009026246e-05, "loss": 0.8471, "step": 174680 }, { "epoch": 3.0669428887445354, "grad_norm": 0.05122266154897929, "learning_rate": 6.878661268254045e-05, "loss": 0.8395, "step": 174690 }, { "epoch": 3.067118453624537, "grad_norm": 0.051580443367627876, "learning_rate": 6.877607568296823e-05, "loss": 0.8423, "step": 174700 }, { "epoch": 3.0672940185045383, "grad_norm": 0.06784917626818288, "learning_rate": 6.876553909167929e-05, "loss": 0.8457, "step": 174710 }, { "epoch": 3.06746958338454, "grad_norm": 0.05312945720232359, "learning_rate": 6.875500290880716e-05, "loss": 0.844, "step": 174720 }, { "epoch": 3.0676451482645413, "grad_norm": 0.056908426585540135, "learning_rate": 6.874446713448533e-05, "loss": 0.8415, "step": 174730 }, { "epoch": 3.0678207131445427, "grad_norm": 0.0571028198515609, "learning_rate": 6.873393176884728e-05, "loss": 0.8459, "step": 174740 }, { "epoch": 3.0679962780245438, "grad_norm": 0.05604837147419114, "learning_rate": 6.872339681202647e-05, "loss": 0.8366, "step": 174750 }, { "epoch": 3.0681718429045453, "grad_norm": 0.06581767315761806, "learning_rate": 6.871286226415644e-05, "loss": 0.8422, "step": 174760 }, { "epoch": 3.0683474077845467, "grad_norm": 0.07112452090976763, "learning_rate": 6.870232812537062e-05, "loss": 0.84, "step": 174770 }, { "epoch": 3.068522972664548, "grad_norm": 0.05459984864656078, "learning_rate": 6.869179439580247e-05, "loss": 0.8373, "step": 174780 }, { "epoch": 3.0686985375445497, "grad_norm": 0.05458543856797954, "learning_rate": 6.868126107558551e-05, "loss": 0.84, "step": 174790 }, { "epoch": 3.068874102424551, "grad_norm": 0.0507677971273891, "learning_rate": 6.867072816485315e-05, "loss": 0.8364, "step": 174800 }, { "epoch": 3.069049667304552, "grad_norm": 0.07528736848452743, "learning_rate": 6.866019566373888e-05, "loss": 0.8418, "step": 174810 }, { "epoch": 3.0692252321845537, "grad_norm": 0.05480526500026667, "learning_rate": 6.86496635723761e-05, "loss": 0.8482, "step": 174820 }, { "epoch": 3.069400797064555, "grad_norm": 0.07761961636197628, "learning_rate": 6.863913189089831e-05, "loss": 0.8401, "step": 174830 }, { "epoch": 3.0695763619445566, "grad_norm": 0.06851076582836027, "learning_rate": 6.862860061943892e-05, "loss": 0.8434, "step": 174840 }, { "epoch": 3.069751926824558, "grad_norm": 0.04836704169031059, "learning_rate": 6.861806975813132e-05, "loss": 0.8453, "step": 174850 }, { "epoch": 3.0699274917045596, "grad_norm": 0.04928533420809973, "learning_rate": 6.860753930710902e-05, "loss": 0.8396, "step": 174860 }, { "epoch": 3.0701030565845606, "grad_norm": 0.04876020981083669, "learning_rate": 6.859700926650542e-05, "loss": 0.8458, "step": 174870 }, { "epoch": 3.070278621464562, "grad_norm": 0.05198083813925165, "learning_rate": 6.858647963645388e-05, "loss": 0.8397, "step": 174880 }, { "epoch": 3.0704541863445636, "grad_norm": 0.05120276262284467, "learning_rate": 6.857595041708789e-05, "loss": 0.844, "step": 174890 }, { "epoch": 3.070629751224565, "grad_norm": 0.06470388183285733, "learning_rate": 6.856542160854081e-05, "loss": 0.8397, "step": 174900 }, { "epoch": 3.0708053161045665, "grad_norm": 0.049359980020908516, "learning_rate": 6.855489321094607e-05, "loss": 0.8451, "step": 174910 }, { "epoch": 3.070980880984568, "grad_norm": 0.0635577167935494, "learning_rate": 6.854436522443701e-05, "loss": 0.8358, "step": 174920 }, { "epoch": 3.071156445864569, "grad_norm": 0.07775946223093315, "learning_rate": 6.853383764914711e-05, "loss": 0.8447, "step": 174930 }, { "epoch": 3.0713320107445705, "grad_norm": 0.047803275137090706, "learning_rate": 6.852331048520971e-05, "loss": 0.8501, "step": 174940 }, { "epoch": 3.071507575624572, "grad_norm": 0.060496403907323276, "learning_rate": 6.851278373275812e-05, "loss": 0.8551, "step": 174950 }, { "epoch": 3.0716831405045735, "grad_norm": 0.05776512595723172, "learning_rate": 6.850225739192584e-05, "loss": 0.8429, "step": 174960 }, { "epoch": 3.071858705384575, "grad_norm": 0.04623281312838503, "learning_rate": 6.849173146284619e-05, "loss": 0.8448, "step": 174970 }, { "epoch": 3.0720342702645764, "grad_norm": 0.0515425328795, "learning_rate": 6.848120594565252e-05, "loss": 0.8382, "step": 174980 }, { "epoch": 3.0722098351445775, "grad_norm": 0.057203149662560474, "learning_rate": 6.847068084047819e-05, "loss": 0.8429, "step": 174990 }, { "epoch": 3.072385400024579, "grad_norm": 0.04053235461373034, "learning_rate": 6.84601561474566e-05, "loss": 0.8411, "step": 175000 }, { "epoch": 3.0725609649045804, "grad_norm": 0.04686865700704318, "learning_rate": 6.844963186672105e-05, "loss": 0.8485, "step": 175010 }, { "epoch": 3.072736529784582, "grad_norm": 0.06295647949797388, "learning_rate": 6.843910799840488e-05, "loss": 0.8396, "step": 175020 }, { "epoch": 3.0729120946645834, "grad_norm": 0.05486772996822058, "learning_rate": 6.842858454264148e-05, "loss": 0.8437, "step": 175030 }, { "epoch": 3.073087659544585, "grad_norm": 0.056094204974172605, "learning_rate": 6.841806149956417e-05, "loss": 0.8404, "step": 175040 }, { "epoch": 3.0732632244245863, "grad_norm": 0.06139133163686299, "learning_rate": 6.840753886930624e-05, "loss": 0.8446, "step": 175050 }, { "epoch": 3.0734387893045874, "grad_norm": 0.050802315207763675, "learning_rate": 6.839701665200104e-05, "loss": 0.843, "step": 175060 }, { "epoch": 3.073614354184589, "grad_norm": 0.04625580713468218, "learning_rate": 6.838649484778191e-05, "loss": 0.8395, "step": 175070 }, { "epoch": 3.0737899190645903, "grad_norm": 0.059882726888172894, "learning_rate": 6.837597345678214e-05, "loss": 0.8414, "step": 175080 }, { "epoch": 3.073965483944592, "grad_norm": 0.06155571828271598, "learning_rate": 6.836545247913501e-05, "loss": 0.8513, "step": 175090 }, { "epoch": 3.0741410488245933, "grad_norm": 0.08269285448885104, "learning_rate": 6.835493191497386e-05, "loss": 0.8458, "step": 175100 }, { "epoch": 3.0743166137045947, "grad_norm": 0.04657123693172788, "learning_rate": 6.834441176443201e-05, "loss": 0.8378, "step": 175110 }, { "epoch": 3.074492178584596, "grad_norm": 0.0480059044932817, "learning_rate": 6.833389202764265e-05, "loss": 0.8378, "step": 175120 }, { "epoch": 3.0746677434645973, "grad_norm": 0.0643675751427009, "learning_rate": 6.83233727047392e-05, "loss": 0.8447, "step": 175130 }, { "epoch": 3.0748433083445987, "grad_norm": 0.0694262062027441, "learning_rate": 6.831285379585486e-05, "loss": 0.8355, "step": 175140 }, { "epoch": 3.0750188732246, "grad_norm": 0.07284552273729417, "learning_rate": 6.830233530112294e-05, "loss": 0.838, "step": 175150 }, { "epoch": 3.0751944381046017, "grad_norm": 0.05345974779365334, "learning_rate": 6.829181722067668e-05, "loss": 0.8409, "step": 175160 }, { "epoch": 3.075370002984603, "grad_norm": 0.0511138065082372, "learning_rate": 6.828129955464937e-05, "loss": 0.8457, "step": 175170 }, { "epoch": 3.075545567864604, "grad_norm": 0.04477027606524673, "learning_rate": 6.827078230317429e-05, "loss": 0.8459, "step": 175180 }, { "epoch": 3.0757211327446057, "grad_norm": 0.05386443513596692, "learning_rate": 6.82602654663846e-05, "loss": 0.8432, "step": 175190 }, { "epoch": 3.075896697624607, "grad_norm": 0.06114659397655289, "learning_rate": 6.82497490444137e-05, "loss": 0.8397, "step": 175200 }, { "epoch": 3.0760722625046086, "grad_norm": 0.050591329980027265, "learning_rate": 6.823923303739471e-05, "loss": 0.8481, "step": 175210 }, { "epoch": 3.07624782738461, "grad_norm": 0.03604864912326447, "learning_rate": 6.822871744546093e-05, "loss": 0.8475, "step": 175220 }, { "epoch": 3.0764233922646116, "grad_norm": 0.043813599783656364, "learning_rate": 6.821820226874559e-05, "loss": 0.8438, "step": 175230 }, { "epoch": 3.0765989571446126, "grad_norm": 0.046396225454713835, "learning_rate": 6.82076875073819e-05, "loss": 0.8415, "step": 175240 }, { "epoch": 3.076774522024614, "grad_norm": 0.056352733081177483, "learning_rate": 6.819717316150312e-05, "loss": 0.8489, "step": 175250 }, { "epoch": 3.0769500869046156, "grad_norm": 0.07139400274647001, "learning_rate": 6.81866592312424e-05, "loss": 0.8439, "step": 175260 }, { "epoch": 3.077125651784617, "grad_norm": 0.060044101621152965, "learning_rate": 6.817614571673302e-05, "loss": 0.84, "step": 175270 }, { "epoch": 3.0773012166646185, "grad_norm": 0.04775424328424617, "learning_rate": 6.816563261810816e-05, "loss": 0.8346, "step": 175280 }, { "epoch": 3.07747678154462, "grad_norm": 0.05733613363852002, "learning_rate": 6.8155119935501e-05, "loss": 0.8458, "step": 175290 }, { "epoch": 3.077652346424621, "grad_norm": 0.0515582963293572, "learning_rate": 6.81446076690448e-05, "loss": 0.8299, "step": 175300 }, { "epoch": 3.0778279113046225, "grad_norm": 0.05405448904137167, "learning_rate": 6.813409581887273e-05, "loss": 0.8391, "step": 175310 }, { "epoch": 3.078003476184624, "grad_norm": 0.06706674988943122, "learning_rate": 6.812358438511794e-05, "loss": 0.8336, "step": 175320 }, { "epoch": 3.0781790410646255, "grad_norm": 0.054050909432187884, "learning_rate": 6.811307336791364e-05, "loss": 0.8462, "step": 175330 }, { "epoch": 3.078354605944627, "grad_norm": 0.05787285056921521, "learning_rate": 6.8102562767393e-05, "loss": 0.8367, "step": 175340 }, { "epoch": 3.0785301708246284, "grad_norm": 0.054090252481081125, "learning_rate": 6.809205258368922e-05, "loss": 0.8438, "step": 175350 }, { "epoch": 3.0787057357046295, "grad_norm": 0.06158706204185987, "learning_rate": 6.808154281693539e-05, "loss": 0.8429, "step": 175360 }, { "epoch": 3.078881300584631, "grad_norm": 0.06846877903509613, "learning_rate": 6.807103346726476e-05, "loss": 0.8419, "step": 175370 }, { "epoch": 3.0790568654646324, "grad_norm": 0.05421220977384527, "learning_rate": 6.806052453481044e-05, "loss": 0.8482, "step": 175380 }, { "epoch": 3.079232430344634, "grad_norm": 0.05748259945135824, "learning_rate": 6.805001601970557e-05, "loss": 0.8425, "step": 175390 }, { "epoch": 3.0794079952246354, "grad_norm": 0.05012459230974726, "learning_rate": 6.803950792208333e-05, "loss": 0.8482, "step": 175400 }, { "epoch": 3.079583560104637, "grad_norm": 0.05279473617199634, "learning_rate": 6.802900024207686e-05, "loss": 0.8405, "step": 175410 }, { "epoch": 3.079759124984638, "grad_norm": 0.05834227635235926, "learning_rate": 6.801849297981925e-05, "loss": 0.8338, "step": 175420 }, { "epoch": 3.0799346898646394, "grad_norm": 0.06710985352832502, "learning_rate": 6.800798613544365e-05, "loss": 0.8397, "step": 175430 }, { "epoch": 3.080110254744641, "grad_norm": 0.07236476195349621, "learning_rate": 6.79974797090832e-05, "loss": 0.8382, "step": 175440 }, { "epoch": 3.0802858196246423, "grad_norm": 0.04201045618977495, "learning_rate": 6.798697370087101e-05, "loss": 0.8394, "step": 175450 }, { "epoch": 3.080461384504644, "grad_norm": 0.060240009565047326, "learning_rate": 6.797646811094015e-05, "loss": 0.8422, "step": 175460 }, { "epoch": 3.0806369493846453, "grad_norm": 0.07941543292607534, "learning_rate": 6.796596293942381e-05, "loss": 0.8387, "step": 175470 }, { "epoch": 3.0808125142646463, "grad_norm": 0.05312550477204751, "learning_rate": 6.795545818645505e-05, "loss": 0.846, "step": 175480 }, { "epoch": 3.080988079144648, "grad_norm": 0.04254467151011606, "learning_rate": 6.794495385216697e-05, "loss": 0.8384, "step": 175490 }, { "epoch": 3.0811636440246493, "grad_norm": 0.060770883838173036, "learning_rate": 6.793444993669263e-05, "loss": 0.8476, "step": 175500 }, { "epoch": 3.0813392089046507, "grad_norm": 0.044073989113208034, "learning_rate": 6.792394644016518e-05, "loss": 0.8439, "step": 175510 }, { "epoch": 3.081514773784652, "grad_norm": 0.04559024828243668, "learning_rate": 6.791344336271767e-05, "loss": 0.8431, "step": 175520 }, { "epoch": 3.0816903386646537, "grad_norm": 0.041661859889022344, "learning_rate": 6.790294070448312e-05, "loss": 0.8383, "step": 175530 }, { "epoch": 3.0818659035446547, "grad_norm": 0.04782584135692878, "learning_rate": 6.78924384655947e-05, "loss": 0.8319, "step": 175540 }, { "epoch": 3.082041468424656, "grad_norm": 0.05157099116040579, "learning_rate": 6.788193664618543e-05, "loss": 0.8402, "step": 175550 }, { "epoch": 3.0822170333046577, "grad_norm": 0.05156657037128815, "learning_rate": 6.787143524638834e-05, "loss": 0.8466, "step": 175560 }, { "epoch": 3.082392598184659, "grad_norm": 0.09123495315940242, "learning_rate": 6.786093426633655e-05, "loss": 0.8413, "step": 175570 }, { "epoch": 3.0825681630646606, "grad_norm": 0.06611216396004564, "learning_rate": 6.785043370616306e-05, "loss": 0.8393, "step": 175580 }, { "epoch": 3.082743727944662, "grad_norm": 0.045253924977061835, "learning_rate": 6.783993356600095e-05, "loss": 0.8354, "step": 175590 }, { "epoch": 3.082919292824663, "grad_norm": 0.04904783246215093, "learning_rate": 6.78294338459832e-05, "loss": 0.8432, "step": 175600 }, { "epoch": 3.0830948577046646, "grad_norm": 0.04682271394293054, "learning_rate": 6.781893454624289e-05, "loss": 0.8522, "step": 175610 }, { "epoch": 3.083270422584666, "grad_norm": 0.05571550586570587, "learning_rate": 6.780843566691305e-05, "loss": 0.8385, "step": 175620 }, { "epoch": 3.0834459874646676, "grad_norm": 0.06299299756136877, "learning_rate": 6.779793720812666e-05, "loss": 0.838, "step": 175630 }, { "epoch": 3.083621552344669, "grad_norm": 0.0539389625226837, "learning_rate": 6.778743917001679e-05, "loss": 0.8381, "step": 175640 }, { "epoch": 3.0837971172246705, "grad_norm": 0.04583732730772104, "learning_rate": 6.777694155271645e-05, "loss": 0.8364, "step": 175650 }, { "epoch": 3.0839726821046716, "grad_norm": 0.050943267480609795, "learning_rate": 6.776644435635862e-05, "loss": 0.8441, "step": 175660 }, { "epoch": 3.084148246984673, "grad_norm": 0.05646451104510366, "learning_rate": 6.77559475810763e-05, "loss": 0.8456, "step": 175670 }, { "epoch": 3.0843238118646745, "grad_norm": 0.07566261431552623, "learning_rate": 6.774545122700248e-05, "loss": 0.8493, "step": 175680 }, { "epoch": 3.084499376744676, "grad_norm": 0.08124948062868495, "learning_rate": 6.77349552942702e-05, "loss": 0.8429, "step": 175690 }, { "epoch": 3.0846749416246775, "grad_norm": 0.043292922569825705, "learning_rate": 6.772445978301237e-05, "loss": 0.8422, "step": 175700 }, { "epoch": 3.084850506504679, "grad_norm": 0.050549397081318684, "learning_rate": 6.771396469336204e-05, "loss": 0.851, "step": 175710 }, { "epoch": 3.0850260713846804, "grad_norm": 0.06559995355768417, "learning_rate": 6.770347002545215e-05, "loss": 0.8366, "step": 175720 }, { "epoch": 3.0852016362646815, "grad_norm": 0.04670164810150193, "learning_rate": 6.769297577941567e-05, "loss": 0.8434, "step": 175730 }, { "epoch": 3.085377201144683, "grad_norm": 0.049239239332879776, "learning_rate": 6.768248195538559e-05, "loss": 0.847, "step": 175740 }, { "epoch": 3.0855527660246844, "grad_norm": 0.04919922779698342, "learning_rate": 6.767198855349484e-05, "loss": 0.8427, "step": 175750 }, { "epoch": 3.085728330904686, "grad_norm": 0.04833090629942577, "learning_rate": 6.766149557387638e-05, "loss": 0.8364, "step": 175760 }, { "epoch": 3.0859038957846874, "grad_norm": 0.08210172953866542, "learning_rate": 6.765100301666317e-05, "loss": 0.8348, "step": 175770 }, { "epoch": 3.0860794606646884, "grad_norm": 0.04054686309990024, "learning_rate": 6.764051088198813e-05, "loss": 0.841, "step": 175780 }, { "epoch": 3.08625502554469, "grad_norm": 0.05298319722869329, "learning_rate": 6.763001916998422e-05, "loss": 0.8444, "step": 175790 }, { "epoch": 3.0864305904246914, "grad_norm": 0.05618117653237754, "learning_rate": 6.761952788078432e-05, "loss": 0.8374, "step": 175800 }, { "epoch": 3.086606155304693, "grad_norm": 0.04603667412880749, "learning_rate": 6.760903701452144e-05, "loss": 0.8432, "step": 175810 }, { "epoch": 3.0867817201846943, "grad_norm": 0.06707158077092362, "learning_rate": 6.759854657132847e-05, "loss": 0.8457, "step": 175820 }, { "epoch": 3.086957285064696, "grad_norm": 0.056767607665002016, "learning_rate": 6.758805655133831e-05, "loss": 0.8384, "step": 175830 }, { "epoch": 3.0871328499446973, "grad_norm": 0.048808244670754085, "learning_rate": 6.757756695468386e-05, "loss": 0.8335, "step": 175840 }, { "epoch": 3.0873084148246983, "grad_norm": 0.05289745362125682, "learning_rate": 6.756707778149805e-05, "loss": 0.8407, "step": 175850 }, { "epoch": 3.0874839797047, "grad_norm": 0.05078149034254458, "learning_rate": 6.755658903191378e-05, "loss": 0.8449, "step": 175860 }, { "epoch": 3.0876595445847013, "grad_norm": 0.06720827218637557, "learning_rate": 6.754610070606391e-05, "loss": 0.8474, "step": 175870 }, { "epoch": 3.0878351094647027, "grad_norm": 0.047477115099590815, "learning_rate": 6.753561280408137e-05, "loss": 0.8475, "step": 175880 }, { "epoch": 3.088010674344704, "grad_norm": 0.08208660857161702, "learning_rate": 6.752512532609904e-05, "loss": 0.8408, "step": 175890 }, { "epoch": 3.0881862392247057, "grad_norm": 0.051133971096909626, "learning_rate": 6.751463827224977e-05, "loss": 0.8439, "step": 175900 }, { "epoch": 3.0883618041047067, "grad_norm": 0.046556948912207075, "learning_rate": 6.750415164266646e-05, "loss": 0.8461, "step": 175910 }, { "epoch": 3.088537368984708, "grad_norm": 0.045587662016547306, "learning_rate": 6.749366543748199e-05, "loss": 0.837, "step": 175920 }, { "epoch": 3.0887129338647097, "grad_norm": 0.08083303810312431, "learning_rate": 6.748317965682919e-05, "loss": 0.8473, "step": 175930 }, { "epoch": 3.088888498744711, "grad_norm": 0.06606754874293368, "learning_rate": 6.747269430084091e-05, "loss": 0.8404, "step": 175940 }, { "epoch": 3.0890640636247126, "grad_norm": 0.06173030206203973, "learning_rate": 6.746220936965003e-05, "loss": 0.8406, "step": 175950 }, { "epoch": 3.089239628504714, "grad_norm": 0.05005264784060539, "learning_rate": 6.745172486338939e-05, "loss": 0.8388, "step": 175960 }, { "epoch": 3.089415193384715, "grad_norm": 0.0703764318352421, "learning_rate": 6.744124078219177e-05, "loss": 0.8373, "step": 175970 }, { "epoch": 3.0895907582647166, "grad_norm": 0.05311478626961473, "learning_rate": 6.743075712619011e-05, "loss": 0.8466, "step": 175980 }, { "epoch": 3.089766323144718, "grad_norm": 0.05151508757425209, "learning_rate": 6.74202738955172e-05, "loss": 0.835, "step": 175990 }, { "epoch": 3.0899418880247196, "grad_norm": 0.06506151750285391, "learning_rate": 6.740979109030586e-05, "loss": 0.8465, "step": 176000 }, { "epoch": 3.090117452904721, "grad_norm": 0.05516643759727267, "learning_rate": 6.739930871068889e-05, "loss": 0.8513, "step": 176010 }, { "epoch": 3.0902930177847225, "grad_norm": 0.06764897001196098, "learning_rate": 6.73888267567991e-05, "loss": 0.8541, "step": 176020 }, { "epoch": 3.0904685826647236, "grad_norm": 0.03943588138619527, "learning_rate": 6.737834522876936e-05, "loss": 0.8425, "step": 176030 }, { "epoch": 3.090644147544725, "grad_norm": 0.05904913230800903, "learning_rate": 6.736786412673235e-05, "loss": 0.841, "step": 176040 }, { "epoch": 3.0908197124247265, "grad_norm": 0.04623364912743804, "learning_rate": 6.735738345082101e-05, "loss": 0.8322, "step": 176050 }, { "epoch": 3.090995277304728, "grad_norm": 0.06328085380103222, "learning_rate": 6.734690320116807e-05, "loss": 0.8498, "step": 176060 }, { "epoch": 3.0911708421847295, "grad_norm": 0.06063478765068309, "learning_rate": 6.73364233779063e-05, "loss": 0.8342, "step": 176070 }, { "epoch": 3.091346407064731, "grad_norm": 0.057848255670455755, "learning_rate": 6.73259439811685e-05, "loss": 0.8367, "step": 176080 }, { "epoch": 3.091521971944732, "grad_norm": 0.052824182087618576, "learning_rate": 6.731546501108746e-05, "loss": 0.8435, "step": 176090 }, { "epoch": 3.0916975368247335, "grad_norm": 0.06899720080919773, "learning_rate": 6.730498646779592e-05, "loss": 0.8365, "step": 176100 }, { "epoch": 3.091873101704735, "grad_norm": 0.07054744576774609, "learning_rate": 6.729450835142665e-05, "loss": 0.8462, "step": 176110 }, { "epoch": 3.0920486665847364, "grad_norm": 0.05298313185937617, "learning_rate": 6.728403066211243e-05, "loss": 0.8442, "step": 176120 }, { "epoch": 3.092224231464738, "grad_norm": 0.04631277822133381, "learning_rate": 6.7273553399986e-05, "loss": 0.8449, "step": 176130 }, { "epoch": 3.0923997963447394, "grad_norm": 0.051131948431171276, "learning_rate": 6.72630765651801e-05, "loss": 0.8552, "step": 176140 }, { "epoch": 3.0925753612247404, "grad_norm": 0.05287713288912453, "learning_rate": 6.72526001578275e-05, "loss": 0.841, "step": 176150 }, { "epoch": 3.092750926104742, "grad_norm": 0.05084416466670568, "learning_rate": 6.724212417806093e-05, "loss": 0.8452, "step": 176160 }, { "epoch": 3.0929264909847434, "grad_norm": 0.058122725910300994, "learning_rate": 6.723164862601311e-05, "loss": 0.8392, "step": 176170 }, { "epoch": 3.093102055864745, "grad_norm": 0.05989885352761236, "learning_rate": 6.722117350181675e-05, "loss": 0.8459, "step": 176180 }, { "epoch": 3.0932776207447463, "grad_norm": 0.05373275796701177, "learning_rate": 6.721069880560462e-05, "loss": 0.8519, "step": 176190 }, { "epoch": 3.093453185624748, "grad_norm": 0.05451270370316998, "learning_rate": 6.720022453750942e-05, "loss": 0.8441, "step": 176200 }, { "epoch": 3.093628750504749, "grad_norm": 0.0451687006867277, "learning_rate": 6.718975069766382e-05, "loss": 0.8435, "step": 176210 }, { "epoch": 3.0938043153847503, "grad_norm": 0.05360337902536341, "learning_rate": 6.717927728620057e-05, "loss": 0.8389, "step": 176220 }, { "epoch": 3.093979880264752, "grad_norm": 0.05076147497846708, "learning_rate": 6.716880430325238e-05, "loss": 0.8386, "step": 176230 }, { "epoch": 3.0941554451447533, "grad_norm": 0.06218577502833038, "learning_rate": 6.71583317489519e-05, "loss": 0.8442, "step": 176240 }, { "epoch": 3.0943310100247547, "grad_norm": 0.05588292932014368, "learning_rate": 6.714785962343186e-05, "loss": 0.8363, "step": 176250 }, { "epoch": 3.0945065749047562, "grad_norm": 0.05465870914154287, "learning_rate": 6.713738792682493e-05, "loss": 0.8413, "step": 176260 }, { "epoch": 3.0946821397847573, "grad_norm": 0.11220465912370961, "learning_rate": 6.712691665926379e-05, "loss": 0.8324, "step": 176270 }, { "epoch": 3.0948577046647587, "grad_norm": 0.05179901890804501, "learning_rate": 6.711644582088108e-05, "loss": 0.8376, "step": 176280 }, { "epoch": 3.09503326954476, "grad_norm": 0.04638387583813963, "learning_rate": 6.710597541180951e-05, "loss": 0.8363, "step": 176290 }, { "epoch": 3.0952088344247617, "grad_norm": 0.0672091943619568, "learning_rate": 6.709550543218174e-05, "loss": 0.8333, "step": 176300 }, { "epoch": 3.095384399304763, "grad_norm": 0.050509011827878175, "learning_rate": 6.708503588213037e-05, "loss": 0.8483, "step": 176310 }, { "epoch": 3.0955599641847646, "grad_norm": 0.055071959741475975, "learning_rate": 6.707456676178811e-05, "loss": 0.8439, "step": 176320 }, { "epoch": 3.0957355290647657, "grad_norm": 0.04676578760943654, "learning_rate": 6.706409807128763e-05, "loss": 0.846, "step": 176330 }, { "epoch": 3.095911093944767, "grad_norm": 0.057354728297267024, "learning_rate": 6.705362981076151e-05, "loss": 0.8386, "step": 176340 }, { "epoch": 3.0960866588247686, "grad_norm": 0.045402510007482424, "learning_rate": 6.704316198034239e-05, "loss": 0.8342, "step": 176350 }, { "epoch": 3.09626222370477, "grad_norm": 0.05535217172348485, "learning_rate": 6.703269458016293e-05, "loss": 0.8432, "step": 176360 }, { "epoch": 3.0964377885847716, "grad_norm": 0.07145952704896984, "learning_rate": 6.702222761035574e-05, "loss": 0.8448, "step": 176370 }, { "epoch": 3.096613353464773, "grad_norm": 0.05350954592173466, "learning_rate": 6.70117610710534e-05, "loss": 0.8435, "step": 176380 }, { "epoch": 3.096788918344774, "grad_norm": 0.051361589057380065, "learning_rate": 6.700129496238861e-05, "loss": 0.8442, "step": 176390 }, { "epoch": 3.0969644832247756, "grad_norm": 0.050063715457771094, "learning_rate": 6.699082928449392e-05, "loss": 0.8459, "step": 176400 }, { "epoch": 3.097140048104777, "grad_norm": 0.04516873235987365, "learning_rate": 6.698036403750192e-05, "loss": 0.8499, "step": 176410 }, { "epoch": 3.0973156129847785, "grad_norm": 0.05906207479194119, "learning_rate": 6.696989922154526e-05, "loss": 0.8412, "step": 176420 }, { "epoch": 3.09749117786478, "grad_norm": 0.04819318209330234, "learning_rate": 6.69594348367565e-05, "loss": 0.848, "step": 176430 }, { "epoch": 3.0976667427447815, "grad_norm": 0.05126633724485611, "learning_rate": 6.69489708832682e-05, "loss": 0.8397, "step": 176440 }, { "epoch": 3.0978423076247825, "grad_norm": 0.06078854695194317, "learning_rate": 6.693850736121297e-05, "loss": 0.8392, "step": 176450 }, { "epoch": 3.098017872504784, "grad_norm": 0.04663654232338929, "learning_rate": 6.692804427072339e-05, "loss": 0.8397, "step": 176460 }, { "epoch": 3.0981934373847855, "grad_norm": 0.05707538363705613, "learning_rate": 6.6917581611932e-05, "loss": 0.839, "step": 176470 }, { "epoch": 3.098369002264787, "grad_norm": 0.05105369212394215, "learning_rate": 6.690711938497137e-05, "loss": 0.8433, "step": 176480 }, { "epoch": 3.0985445671447884, "grad_norm": 0.08019685722200995, "learning_rate": 6.68966575899741e-05, "loss": 0.8313, "step": 176490 }, { "epoch": 3.09872013202479, "grad_norm": 0.04805858596052041, "learning_rate": 6.688619622707271e-05, "loss": 0.8376, "step": 176500 }, { "epoch": 3.0988956969047914, "grad_norm": 0.05790563856657866, "learning_rate": 6.687573529639976e-05, "loss": 0.8365, "step": 176510 }, { "epoch": 3.0990712617847924, "grad_norm": 0.04436462794553314, "learning_rate": 6.686527479808777e-05, "loss": 0.8422, "step": 176520 }, { "epoch": 3.099246826664794, "grad_norm": 0.06487651378632081, "learning_rate": 6.68548147322693e-05, "loss": 0.8454, "step": 176530 }, { "epoch": 3.0994223915447954, "grad_norm": 0.065770713312634, "learning_rate": 6.684435509907686e-05, "loss": 0.8372, "step": 176540 }, { "epoch": 3.099597956424797, "grad_norm": 0.058645581726262254, "learning_rate": 6.683389589864297e-05, "loss": 0.8366, "step": 176550 }, { "epoch": 3.0997735213047983, "grad_norm": 0.06740267347432748, "learning_rate": 6.682343713110019e-05, "loss": 0.837, "step": 176560 }, { "epoch": 3.0999490861848, "grad_norm": 0.050481602196107256, "learning_rate": 6.681297879658101e-05, "loss": 0.8343, "step": 176570 }, { "epoch": 3.100124651064801, "grad_norm": 0.04735066133663418, "learning_rate": 6.680252089521794e-05, "loss": 0.8389, "step": 176580 }, { "epoch": 3.1003002159448023, "grad_norm": 0.056510662029249474, "learning_rate": 6.67920634271435e-05, "loss": 0.8442, "step": 176590 }, { "epoch": 3.100475780824804, "grad_norm": 0.06140064798466904, "learning_rate": 6.678160639249016e-05, "loss": 0.8424, "step": 176600 }, { "epoch": 3.1006513457048053, "grad_norm": 0.05302413778783998, "learning_rate": 6.677114979139042e-05, "loss": 0.8359, "step": 176610 }, { "epoch": 3.1008269105848068, "grad_norm": 0.05032403273116645, "learning_rate": 6.676069362397675e-05, "loss": 0.8482, "step": 176620 }, { "epoch": 3.1010024754648082, "grad_norm": 0.07322200615015086, "learning_rate": 6.675023789038169e-05, "loss": 0.841, "step": 176630 }, { "epoch": 3.1011780403448093, "grad_norm": 0.04620816425499337, "learning_rate": 6.673978259073768e-05, "loss": 0.8459, "step": 176640 }, { "epoch": 3.1013536052248107, "grad_norm": 0.04132884488616638, "learning_rate": 6.672932772517715e-05, "loss": 0.8461, "step": 176650 }, { "epoch": 3.101529170104812, "grad_norm": 0.0506498732779882, "learning_rate": 6.671887329383264e-05, "loss": 0.8419, "step": 176660 }, { "epoch": 3.1017047349848137, "grad_norm": 0.05770249725276673, "learning_rate": 6.670841929683658e-05, "loss": 0.8461, "step": 176670 }, { "epoch": 3.101880299864815, "grad_norm": 0.05317057757347464, "learning_rate": 6.669796573432143e-05, "loss": 0.8408, "step": 176680 }, { "epoch": 3.1020558647448166, "grad_norm": 0.05924226787374698, "learning_rate": 6.668751260641959e-05, "loss": 0.8507, "step": 176690 }, { "epoch": 3.1022314296248177, "grad_norm": 0.05570965606153921, "learning_rate": 6.667705991326354e-05, "loss": 0.836, "step": 176700 }, { "epoch": 3.102406994504819, "grad_norm": 0.058139195393872245, "learning_rate": 6.666660765498575e-05, "loss": 0.843, "step": 176710 }, { "epoch": 3.1025825593848206, "grad_norm": 0.04211043172841682, "learning_rate": 6.665615583171858e-05, "loss": 0.85, "step": 176720 }, { "epoch": 3.102758124264822, "grad_norm": 0.04963040509697402, "learning_rate": 6.664570444359453e-05, "loss": 0.8432, "step": 176730 }, { "epoch": 3.1029336891448236, "grad_norm": 0.04916544762149151, "learning_rate": 6.6635253490746e-05, "loss": 0.8417, "step": 176740 }, { "epoch": 3.103109254024825, "grad_norm": 0.044549785156672114, "learning_rate": 6.662480297330535e-05, "loss": 0.8446, "step": 176750 }, { "epoch": 3.103284818904826, "grad_norm": 0.05475749730330051, "learning_rate": 6.661435289140509e-05, "loss": 0.8407, "step": 176760 }, { "epoch": 3.1034603837848276, "grad_norm": 0.07226929245841855, "learning_rate": 6.660390324517754e-05, "loss": 0.8399, "step": 176770 }, { "epoch": 3.103635948664829, "grad_norm": 0.04361924738475813, "learning_rate": 6.659345403475514e-05, "loss": 0.8351, "step": 176780 }, { "epoch": 3.1038115135448305, "grad_norm": 0.05299461124014582, "learning_rate": 6.658300526027024e-05, "loss": 0.8432, "step": 176790 }, { "epoch": 3.103987078424832, "grad_norm": 0.06824013958938431, "learning_rate": 6.657255692185528e-05, "loss": 0.8437, "step": 176800 }, { "epoch": 3.1041626433048335, "grad_norm": 0.0775442856086218, "learning_rate": 6.656210901964264e-05, "loss": 0.8394, "step": 176810 }, { "epoch": 3.1043382081848345, "grad_norm": 0.05931238196283119, "learning_rate": 6.655166155376463e-05, "loss": 0.8406, "step": 176820 }, { "epoch": 3.104513773064836, "grad_norm": 0.06084799071151443, "learning_rate": 6.65412145243537e-05, "loss": 0.8491, "step": 176830 }, { "epoch": 3.1046893379448375, "grad_norm": 0.054876693755206984, "learning_rate": 6.653076793154221e-05, "loss": 0.8454, "step": 176840 }, { "epoch": 3.104864902824839, "grad_norm": 0.04786554464039425, "learning_rate": 6.652032177546248e-05, "loss": 0.8393, "step": 176850 }, { "epoch": 3.1050404677048404, "grad_norm": 0.05276662942586663, "learning_rate": 6.650987605624686e-05, "loss": 0.8389, "step": 176860 }, { "epoch": 3.105216032584842, "grad_norm": 0.059039383775644036, "learning_rate": 6.649943077402774e-05, "loss": 0.845, "step": 176870 }, { "epoch": 3.105391597464843, "grad_norm": 0.07418162372790557, "learning_rate": 6.648898592893744e-05, "loss": 0.8371, "step": 176880 }, { "epoch": 3.1055671623448444, "grad_norm": 0.0437778118255116, "learning_rate": 6.647854152110828e-05, "loss": 0.85, "step": 176890 }, { "epoch": 3.105742727224846, "grad_norm": 0.06086256586511177, "learning_rate": 6.646809755067263e-05, "loss": 0.8405, "step": 176900 }, { "epoch": 3.1059182921048474, "grad_norm": 0.04707679576738242, "learning_rate": 6.645765401776282e-05, "loss": 0.8406, "step": 176910 }, { "epoch": 3.106093856984849, "grad_norm": 0.05203652217599716, "learning_rate": 6.644721092251113e-05, "loss": 0.8517, "step": 176920 }, { "epoch": 3.1062694218648503, "grad_norm": 0.05126471686367877, "learning_rate": 6.643676826504992e-05, "loss": 0.8373, "step": 176930 }, { "epoch": 3.1064449867448514, "grad_norm": 0.04537980276521672, "learning_rate": 6.642632604551148e-05, "loss": 0.8469, "step": 176940 }, { "epoch": 3.106620551624853, "grad_norm": 0.0595798849982844, "learning_rate": 6.641588426402811e-05, "loss": 0.8386, "step": 176950 }, { "epoch": 3.1067961165048543, "grad_norm": 0.058961097617391996, "learning_rate": 6.64054429207321e-05, "loss": 0.8353, "step": 176960 }, { "epoch": 3.106971681384856, "grad_norm": 0.055638564737910505, "learning_rate": 6.639500201575577e-05, "loss": 0.8373, "step": 176970 }, { "epoch": 3.1071472462648573, "grad_norm": 0.06332925779999139, "learning_rate": 6.638456154923141e-05, "loss": 0.8421, "step": 176980 }, { "epoch": 3.1073228111448588, "grad_norm": 0.04362178442791679, "learning_rate": 6.637412152129122e-05, "loss": 0.8383, "step": 176990 }, { "epoch": 3.10749837602486, "grad_norm": 0.06603978402243751, "learning_rate": 6.636368193206762e-05, "loss": 0.8459, "step": 177000 }, { "epoch": 3.1076739409048613, "grad_norm": 0.06716662486743671, "learning_rate": 6.63532427816928e-05, "loss": 0.8469, "step": 177010 }, { "epoch": 3.1078495057848627, "grad_norm": 0.05361279841961661, "learning_rate": 6.634280407029904e-05, "loss": 0.8391, "step": 177020 }, { "epoch": 3.108025070664864, "grad_norm": 0.04905073582614076, "learning_rate": 6.633236579801856e-05, "loss": 0.8316, "step": 177030 }, { "epoch": 3.1082006355448657, "grad_norm": 0.06290642479730899, "learning_rate": 6.632192796498368e-05, "loss": 0.8474, "step": 177040 }, { "epoch": 3.108376200424867, "grad_norm": 0.08892634083853725, "learning_rate": 6.631149057132661e-05, "loss": 0.8401, "step": 177050 }, { "epoch": 3.108551765304868, "grad_norm": 0.06368479781067783, "learning_rate": 6.630105361717956e-05, "loss": 0.8425, "step": 177060 }, { "epoch": 3.1087273301848697, "grad_norm": 0.052653646722849785, "learning_rate": 6.629061710267485e-05, "loss": 0.8321, "step": 177070 }, { "epoch": 3.108902895064871, "grad_norm": 0.05370815297699797, "learning_rate": 6.628018102794468e-05, "loss": 0.8355, "step": 177080 }, { "epoch": 3.1090784599448726, "grad_norm": 0.04901570217878158, "learning_rate": 6.626974539312123e-05, "loss": 0.8406, "step": 177090 }, { "epoch": 3.109254024824874, "grad_norm": 0.04954390688303785, "learning_rate": 6.625931019833682e-05, "loss": 0.8449, "step": 177100 }, { "epoch": 3.1094295897048756, "grad_norm": 0.05063946737933878, "learning_rate": 6.62488754437236e-05, "loss": 0.8516, "step": 177110 }, { "epoch": 3.1096051545848766, "grad_norm": 0.0631409712315275, "learning_rate": 6.623844112941377e-05, "loss": 0.8468, "step": 177120 }, { "epoch": 3.109780719464878, "grad_norm": 0.050072784774591685, "learning_rate": 6.622800725553954e-05, "loss": 0.8378, "step": 177130 }, { "epoch": 3.1099562843448796, "grad_norm": 0.06671118380752444, "learning_rate": 6.621757382223314e-05, "loss": 0.85, "step": 177140 }, { "epoch": 3.110131849224881, "grad_norm": 0.06683768119319104, "learning_rate": 6.620714082962674e-05, "loss": 0.8429, "step": 177150 }, { "epoch": 3.1103074141048825, "grad_norm": 0.052705652239980465, "learning_rate": 6.619670827785252e-05, "loss": 0.8434, "step": 177160 }, { "epoch": 3.110482978984884, "grad_norm": 0.05944106098368148, "learning_rate": 6.618627616704268e-05, "loss": 0.8391, "step": 177170 }, { "epoch": 3.1106585438648855, "grad_norm": 0.05080230217133314, "learning_rate": 6.617584449732942e-05, "loss": 0.8384, "step": 177180 }, { "epoch": 3.1108341087448865, "grad_norm": 0.05243556930577264, "learning_rate": 6.616541326884487e-05, "loss": 0.8449, "step": 177190 }, { "epoch": 3.111009673624888, "grad_norm": 0.043715578195933726, "learning_rate": 6.61549824817212e-05, "loss": 0.8418, "step": 177200 }, { "epoch": 3.1111852385048895, "grad_norm": 0.045669521006695274, "learning_rate": 6.614455213609059e-05, "loss": 0.8469, "step": 177210 }, { "epoch": 3.111360803384891, "grad_norm": 0.06094747656221403, "learning_rate": 6.61341222320852e-05, "loss": 0.8389, "step": 177220 }, { "epoch": 3.1115363682648924, "grad_norm": 0.057818860013321254, "learning_rate": 6.61236927698371e-05, "loss": 0.8368, "step": 177230 }, { "epoch": 3.1117119331448935, "grad_norm": 0.04582359894583239, "learning_rate": 6.611326374947855e-05, "loss": 0.8323, "step": 177240 }, { "epoch": 3.111887498024895, "grad_norm": 0.0499217360505235, "learning_rate": 6.610283517114165e-05, "loss": 0.8478, "step": 177250 }, { "epoch": 3.1120630629048964, "grad_norm": 0.04413072104359998, "learning_rate": 6.609240703495846e-05, "loss": 0.8396, "step": 177260 }, { "epoch": 3.112238627784898, "grad_norm": 0.0634393646853726, "learning_rate": 6.60819793410612e-05, "loss": 0.8407, "step": 177270 }, { "epoch": 3.1124141926648994, "grad_norm": 0.05924976277905708, "learning_rate": 6.607155208958196e-05, "loss": 0.8389, "step": 177280 }, { "epoch": 3.112589757544901, "grad_norm": 0.07737968740057356, "learning_rate": 6.606112528065285e-05, "loss": 0.8384, "step": 177290 }, { "epoch": 3.1127653224249023, "grad_norm": 0.04845540781509291, "learning_rate": 6.605069891440596e-05, "loss": 0.8437, "step": 177300 }, { "epoch": 3.1129408873049034, "grad_norm": 0.059077194773759285, "learning_rate": 6.604027299097341e-05, "loss": 0.8415, "step": 177310 }, { "epoch": 3.113116452184905, "grad_norm": 0.057515580874801985, "learning_rate": 6.602984751048733e-05, "loss": 0.8523, "step": 177320 }, { "epoch": 3.1132920170649063, "grad_norm": 0.07952376954277225, "learning_rate": 6.601942247307973e-05, "loss": 0.8367, "step": 177330 }, { "epoch": 3.113467581944908, "grad_norm": 0.049294534387541104, "learning_rate": 6.600899787888279e-05, "loss": 0.8403, "step": 177340 }, { "epoch": 3.1136431468249093, "grad_norm": 0.05251196744729007, "learning_rate": 6.599857372802857e-05, "loss": 0.8372, "step": 177350 }, { "epoch": 3.1138187117049108, "grad_norm": 0.0433265171218327, "learning_rate": 6.598815002064911e-05, "loss": 0.8369, "step": 177360 }, { "epoch": 3.113994276584912, "grad_norm": 0.057443280667234066, "learning_rate": 6.597772675687648e-05, "loss": 0.8402, "step": 177370 }, { "epoch": 3.1141698414649133, "grad_norm": 0.0422988025702983, "learning_rate": 6.596730393684279e-05, "loss": 0.8481, "step": 177380 }, { "epoch": 3.1143454063449147, "grad_norm": 0.05560227758889045, "learning_rate": 6.595688156068008e-05, "loss": 0.8461, "step": 177390 }, { "epoch": 3.1145209712249162, "grad_norm": 0.05662321151439553, "learning_rate": 6.594645962852034e-05, "loss": 0.8369, "step": 177400 }, { "epoch": 3.1146965361049177, "grad_norm": 0.04859052372818408, "learning_rate": 6.593603814049573e-05, "loss": 0.8429, "step": 177410 }, { "epoch": 3.114872100984919, "grad_norm": 0.05179253727012936, "learning_rate": 6.592561709673823e-05, "loss": 0.851, "step": 177420 }, { "epoch": 3.11504766586492, "grad_norm": 0.06962921917500237, "learning_rate": 6.591519649737985e-05, "loss": 0.8409, "step": 177430 }, { "epoch": 3.1152232307449217, "grad_norm": 0.05129597705304573, "learning_rate": 6.59047763425527e-05, "loss": 0.8398, "step": 177440 }, { "epoch": 3.115398795624923, "grad_norm": 0.05974106887912427, "learning_rate": 6.589435663238874e-05, "loss": 0.8427, "step": 177450 }, { "epoch": 3.1155743605049246, "grad_norm": 0.05287089539452583, "learning_rate": 6.588393736702002e-05, "loss": 0.8426, "step": 177460 }, { "epoch": 3.115749925384926, "grad_norm": 0.05228434276704972, "learning_rate": 6.587351854657852e-05, "loss": 0.8463, "step": 177470 }, { "epoch": 3.1159254902649276, "grad_norm": 0.042220703108511205, "learning_rate": 6.586310017119629e-05, "loss": 0.8394, "step": 177480 }, { "epoch": 3.1161010551449286, "grad_norm": 0.049338039564223814, "learning_rate": 6.585268224100533e-05, "loss": 0.8411, "step": 177490 }, { "epoch": 3.11627662002493, "grad_norm": 0.07065650648378875, "learning_rate": 6.584226475613756e-05, "loss": 0.8408, "step": 177500 }, { "epoch": 3.1164521849049316, "grad_norm": 0.05625997273676701, "learning_rate": 6.583184771672508e-05, "loss": 0.8434, "step": 177510 }, { "epoch": 3.116627749784933, "grad_norm": 0.05543963462161017, "learning_rate": 6.582143112289982e-05, "loss": 0.8464, "step": 177520 }, { "epoch": 3.1168033146649345, "grad_norm": 0.04777556372070191, "learning_rate": 6.58110149747938e-05, "loss": 0.8409, "step": 177530 }, { "epoch": 3.116978879544936, "grad_norm": 0.05238845043874038, "learning_rate": 6.580059927253891e-05, "loss": 0.8471, "step": 177540 }, { "epoch": 3.117154444424937, "grad_norm": 0.051129392155283816, "learning_rate": 6.579018401626722e-05, "loss": 0.8296, "step": 177550 }, { "epoch": 3.1173300093049385, "grad_norm": 0.0598766621646147, "learning_rate": 6.577976920611063e-05, "loss": 0.8449, "step": 177560 }, { "epoch": 3.11750557418494, "grad_norm": 0.050334157963618974, "learning_rate": 6.576935484220107e-05, "loss": 0.8444, "step": 177570 }, { "epoch": 3.1176811390649415, "grad_norm": 0.045198543657779075, "learning_rate": 6.575894092467059e-05, "loss": 0.8442, "step": 177580 }, { "epoch": 3.117856703944943, "grad_norm": 0.04941929617447806, "learning_rate": 6.574852745365108e-05, "loss": 0.8393, "step": 177590 }, { "epoch": 3.1180322688249444, "grad_norm": 0.05406099161203175, "learning_rate": 6.573811442927447e-05, "loss": 0.847, "step": 177600 }, { "epoch": 3.1182078337049455, "grad_norm": 0.0549761192786372, "learning_rate": 6.572770185167272e-05, "loss": 0.8371, "step": 177610 }, { "epoch": 3.118383398584947, "grad_norm": 0.042719478552425694, "learning_rate": 6.571728972097774e-05, "loss": 0.8482, "step": 177620 }, { "epoch": 3.1185589634649484, "grad_norm": 0.04548676998799097, "learning_rate": 6.570687803732148e-05, "loss": 0.8449, "step": 177630 }, { "epoch": 3.11873452834495, "grad_norm": 0.05102509185678363, "learning_rate": 6.569646680083578e-05, "loss": 0.841, "step": 177640 }, { "epoch": 3.1189100932249514, "grad_norm": 0.050657791357171196, "learning_rate": 6.568605601165268e-05, "loss": 0.8425, "step": 177650 }, { "epoch": 3.119085658104953, "grad_norm": 0.05272877547268544, "learning_rate": 6.567564566990399e-05, "loss": 0.8492, "step": 177660 }, { "epoch": 3.119261222984954, "grad_norm": 0.0514517185757152, "learning_rate": 6.56652357757216e-05, "loss": 0.8451, "step": 177670 }, { "epoch": 3.1194367878649554, "grad_norm": 0.0549490760175467, "learning_rate": 6.565482632923749e-05, "loss": 0.836, "step": 177680 }, { "epoch": 3.119612352744957, "grad_norm": 0.058762956298762534, "learning_rate": 6.56444173305835e-05, "loss": 0.8447, "step": 177690 }, { "epoch": 3.1197879176249583, "grad_norm": 0.07537230940047412, "learning_rate": 6.563400877989152e-05, "loss": 0.8378, "step": 177700 }, { "epoch": 3.11996348250496, "grad_norm": 0.05858591671875514, "learning_rate": 6.56236006772934e-05, "loss": 0.8352, "step": 177710 }, { "epoch": 3.1201390473849613, "grad_norm": 0.04506413212303719, "learning_rate": 6.561319302292109e-05, "loss": 0.8419, "step": 177720 }, { "epoch": 3.1203146122649623, "grad_norm": 0.06999931293305242, "learning_rate": 6.560278581690638e-05, "loss": 0.8435, "step": 177730 }, { "epoch": 3.120490177144964, "grad_norm": 0.04581097880265455, "learning_rate": 6.559237905938111e-05, "loss": 0.8422, "step": 177740 }, { "epoch": 3.1206657420249653, "grad_norm": 0.057578523798732954, "learning_rate": 6.558197275047725e-05, "loss": 0.8423, "step": 177750 }, { "epoch": 3.1208413069049667, "grad_norm": 0.06674118325822215, "learning_rate": 6.557156689032655e-05, "loss": 0.8423, "step": 177760 }, { "epoch": 3.1210168717849682, "grad_norm": 0.050372967372002725, "learning_rate": 6.556116147906091e-05, "loss": 0.8398, "step": 177770 }, { "epoch": 3.1211924366649697, "grad_norm": 0.04890613674838637, "learning_rate": 6.555075651681213e-05, "loss": 0.8458, "step": 177780 }, { "epoch": 3.1213680015449707, "grad_norm": 0.044581533984415216, "learning_rate": 6.554035200371207e-05, "loss": 0.8457, "step": 177790 }, { "epoch": 3.121543566424972, "grad_norm": 0.050548191870678216, "learning_rate": 6.552994793989255e-05, "loss": 0.8423, "step": 177800 }, { "epoch": 3.1217191313049737, "grad_norm": 0.051049445985347774, "learning_rate": 6.551954432548535e-05, "loss": 0.838, "step": 177810 }, { "epoch": 3.121894696184975, "grad_norm": 0.050643221871884074, "learning_rate": 6.550914116062236e-05, "loss": 0.845, "step": 177820 }, { "epoch": 3.1220702610649766, "grad_norm": 0.07070329059918229, "learning_rate": 6.549873844543536e-05, "loss": 0.8362, "step": 177830 }, { "epoch": 3.122245825944978, "grad_norm": 0.049110603660844736, "learning_rate": 6.54883361800561e-05, "loss": 0.8364, "step": 177840 }, { "epoch": 3.122421390824979, "grad_norm": 0.045021808045287666, "learning_rate": 6.547793436461645e-05, "loss": 0.8443, "step": 177850 }, { "epoch": 3.1225969557049806, "grad_norm": 0.05827316435279269, "learning_rate": 6.54675329992482e-05, "loss": 0.836, "step": 177860 }, { "epoch": 3.122772520584982, "grad_norm": 0.05658196055184611, "learning_rate": 6.545713208408311e-05, "loss": 0.8439, "step": 177870 }, { "epoch": 3.1229480854649836, "grad_norm": 0.06053788321796417, "learning_rate": 6.544673161925296e-05, "loss": 0.8449, "step": 177880 }, { "epoch": 3.123123650344985, "grad_norm": 0.0522538448885441, "learning_rate": 6.543633160488953e-05, "loss": 0.8522, "step": 177890 }, { "epoch": 3.1232992152249865, "grad_norm": 0.0571582676805879, "learning_rate": 6.542593204112461e-05, "loss": 0.8501, "step": 177900 }, { "epoch": 3.1234747801049876, "grad_norm": 0.08569163300346559, "learning_rate": 6.54155329280899e-05, "loss": 0.843, "step": 177910 }, { "epoch": 3.123650344984989, "grad_norm": 0.06206070894049456, "learning_rate": 6.540513426591726e-05, "loss": 0.8423, "step": 177920 }, { "epoch": 3.1238259098649905, "grad_norm": 0.05516956704649832, "learning_rate": 6.539473605473839e-05, "loss": 0.8354, "step": 177930 }, { "epoch": 3.124001474744992, "grad_norm": 0.06073482347575983, "learning_rate": 6.538433829468501e-05, "loss": 0.8391, "step": 177940 }, { "epoch": 3.1241770396249935, "grad_norm": 0.04959690200497376, "learning_rate": 6.53739409858889e-05, "loss": 0.8457, "step": 177950 }, { "epoch": 3.124352604504995, "grad_norm": 0.05311844017994634, "learning_rate": 6.536354412848182e-05, "loss": 0.8353, "step": 177960 }, { "epoch": 3.1245281693849964, "grad_norm": 0.06792895464064776, "learning_rate": 6.535314772259544e-05, "loss": 0.8441, "step": 177970 }, { "epoch": 3.1247037342649975, "grad_norm": 0.04932947436940383, "learning_rate": 6.534275176836148e-05, "loss": 0.8455, "step": 177980 }, { "epoch": 3.124879299144999, "grad_norm": 0.06037963961932143, "learning_rate": 6.533235626591171e-05, "loss": 0.8421, "step": 177990 }, { "epoch": 3.1250548640250004, "grad_norm": 0.05880350181803266, "learning_rate": 6.532196121537785e-05, "loss": 0.8447, "step": 178000 }, { "epoch": 3.125230428905002, "grad_norm": 0.04513031032796769, "learning_rate": 6.531156661689152e-05, "loss": 0.843, "step": 178010 }, { "epoch": 3.1254059937850034, "grad_norm": 0.0466876626317143, "learning_rate": 6.530117247058453e-05, "loss": 0.8487, "step": 178020 }, { "epoch": 3.1255815586650044, "grad_norm": 0.08869849851004087, "learning_rate": 6.529077877658851e-05, "loss": 0.8406, "step": 178030 }, { "epoch": 3.125757123545006, "grad_norm": 0.052984688313148974, "learning_rate": 6.528038553503517e-05, "loss": 0.8443, "step": 178040 }, { "epoch": 3.1259326884250074, "grad_norm": 0.05291178664632654, "learning_rate": 6.526999274605618e-05, "loss": 0.8459, "step": 178050 }, { "epoch": 3.126108253305009, "grad_norm": 0.0575801899515457, "learning_rate": 6.525960040978324e-05, "loss": 0.8409, "step": 178060 }, { "epoch": 3.1262838181850103, "grad_norm": 0.04974705639487808, "learning_rate": 6.524920852634802e-05, "loss": 0.8424, "step": 178070 }, { "epoch": 3.126459383065012, "grad_norm": 0.05101300547258843, "learning_rate": 6.523881709588214e-05, "loss": 0.8387, "step": 178080 }, { "epoch": 3.1266349479450133, "grad_norm": 0.05515173488164023, "learning_rate": 6.522842611851734e-05, "loss": 0.8341, "step": 178090 }, { "epoch": 3.1268105128250143, "grad_norm": 0.053378702734042466, "learning_rate": 6.521803559438524e-05, "loss": 0.8462, "step": 178100 }, { "epoch": 3.126986077705016, "grad_norm": 0.05772258034386112, "learning_rate": 6.520764552361746e-05, "loss": 0.8401, "step": 178110 }, { "epoch": 3.1271616425850173, "grad_norm": 0.04355293450467232, "learning_rate": 6.51972559063457e-05, "loss": 0.8387, "step": 178120 }, { "epoch": 3.1273372074650188, "grad_norm": 0.05455392987846172, "learning_rate": 6.518686674270156e-05, "loss": 0.85, "step": 178130 }, { "epoch": 3.1275127723450202, "grad_norm": 0.0483368953028399, "learning_rate": 6.517647803281667e-05, "loss": 0.8459, "step": 178140 }, { "epoch": 3.1276883372250217, "grad_norm": 0.048856275946127944, "learning_rate": 6.516608977682267e-05, "loss": 0.8344, "step": 178150 }, { "epoch": 3.1278639021050227, "grad_norm": 0.057198972135459145, "learning_rate": 6.51557019748512e-05, "loss": 0.8458, "step": 178160 }, { "epoch": 3.128039466985024, "grad_norm": 0.047307026355425094, "learning_rate": 6.514531462703383e-05, "loss": 0.8408, "step": 178170 }, { "epoch": 3.1282150318650257, "grad_norm": 0.05284158573568351, "learning_rate": 6.513492773350218e-05, "loss": 0.8373, "step": 178180 }, { "epoch": 3.128390596745027, "grad_norm": 0.053462496863110165, "learning_rate": 6.512454129438792e-05, "loss": 0.8529, "step": 178190 }, { "epoch": 3.1285661616250287, "grad_norm": 0.06871051625540021, "learning_rate": 6.511415530982258e-05, "loss": 0.8448, "step": 178200 }, { "epoch": 3.12874172650503, "grad_norm": 0.05140171140299039, "learning_rate": 6.510376977993777e-05, "loss": 0.8431, "step": 178210 }, { "epoch": 3.128917291385031, "grad_norm": 0.06772899811762499, "learning_rate": 6.509338470486506e-05, "loss": 0.8362, "step": 178220 }, { "epoch": 3.1290928562650326, "grad_norm": 0.07046765189879099, "learning_rate": 6.508300008473605e-05, "loss": 0.841, "step": 178230 }, { "epoch": 3.129268421145034, "grad_norm": 0.056475976777573156, "learning_rate": 6.507261591968232e-05, "loss": 0.8404, "step": 178240 }, { "epoch": 3.1294439860250356, "grad_norm": 0.06777508874136066, "learning_rate": 6.506223220983541e-05, "loss": 0.8494, "step": 178250 }, { "epoch": 3.129619550905037, "grad_norm": 0.045103898504592135, "learning_rate": 6.505184895532692e-05, "loss": 0.8446, "step": 178260 }, { "epoch": 3.1297951157850386, "grad_norm": 0.06302107889963031, "learning_rate": 6.504146615628843e-05, "loss": 0.8358, "step": 178270 }, { "epoch": 3.1299706806650396, "grad_norm": 0.044531480238633056, "learning_rate": 6.503108381285139e-05, "loss": 0.8396, "step": 178280 }, { "epoch": 3.130146245545041, "grad_norm": 0.06056271662238985, "learning_rate": 6.502070192514745e-05, "loss": 0.8424, "step": 178290 }, { "epoch": 3.1303218104250425, "grad_norm": 0.052644724285929444, "learning_rate": 6.501032049330812e-05, "loss": 0.8519, "step": 178300 }, { "epoch": 3.130497375305044, "grad_norm": 0.04584034447418287, "learning_rate": 6.499993951746492e-05, "loss": 0.8478, "step": 178310 }, { "epoch": 3.1306729401850455, "grad_norm": 0.06682958527392269, "learning_rate": 6.498955899774937e-05, "loss": 0.839, "step": 178320 }, { "epoch": 3.130848505065047, "grad_norm": 0.0524801982783204, "learning_rate": 6.497917893429303e-05, "loss": 0.8477, "step": 178330 }, { "epoch": 3.131024069945048, "grad_norm": 0.05587078694312685, "learning_rate": 6.49687993272274e-05, "loss": 0.8404, "step": 178340 }, { "epoch": 3.1311996348250495, "grad_norm": 0.06338676452876384, "learning_rate": 6.495842017668393e-05, "loss": 0.8339, "step": 178350 }, { "epoch": 3.131375199705051, "grad_norm": 0.055832188668211, "learning_rate": 6.494804148279424e-05, "loss": 0.8395, "step": 178360 }, { "epoch": 3.1315507645850524, "grad_norm": 0.05683933296879574, "learning_rate": 6.493766324568977e-05, "loss": 0.8456, "step": 178370 }, { "epoch": 3.131726329465054, "grad_norm": 0.057608682793735315, "learning_rate": 6.492728546550201e-05, "loss": 0.8437, "step": 178380 }, { "epoch": 3.1319018943450554, "grad_norm": 0.05250791383340115, "learning_rate": 6.491690814236244e-05, "loss": 0.8473, "step": 178390 }, { "epoch": 3.1320774592250564, "grad_norm": 0.05302478241963316, "learning_rate": 6.490653127640258e-05, "loss": 0.8363, "step": 178400 }, { "epoch": 3.132253024105058, "grad_norm": 0.054861884203445614, "learning_rate": 6.489615486775389e-05, "loss": 0.8402, "step": 178410 }, { "epoch": 3.1324285889850594, "grad_norm": 0.05675549303012766, "learning_rate": 6.48857789165478e-05, "loss": 0.8524, "step": 178420 }, { "epoch": 3.132604153865061, "grad_norm": 0.046365543689716654, "learning_rate": 6.487540342291585e-05, "loss": 0.8389, "step": 178430 }, { "epoch": 3.1327797187450623, "grad_norm": 0.05257065909712258, "learning_rate": 6.486502838698947e-05, "loss": 0.8362, "step": 178440 }, { "epoch": 3.132955283625064, "grad_norm": 0.054202935245921804, "learning_rate": 6.485465380890006e-05, "loss": 0.8396, "step": 178450 }, { "epoch": 3.133130848505065, "grad_norm": 0.04899213709845828, "learning_rate": 6.484427968877914e-05, "loss": 0.8492, "step": 178460 }, { "epoch": 3.1333064133850663, "grad_norm": 0.053653682023365123, "learning_rate": 6.483390602675814e-05, "loss": 0.8447, "step": 178470 }, { "epoch": 3.133481978265068, "grad_norm": 0.04508661937337517, "learning_rate": 6.482353282296848e-05, "loss": 0.8473, "step": 178480 }, { "epoch": 3.1336575431450693, "grad_norm": 0.046944902968170575, "learning_rate": 6.481316007754156e-05, "loss": 0.8453, "step": 178490 }, { "epoch": 3.1338331080250708, "grad_norm": 0.059019728002192344, "learning_rate": 6.480278779060888e-05, "loss": 0.8462, "step": 178500 }, { "epoch": 3.1340086729050722, "grad_norm": 0.07229071392889384, "learning_rate": 6.479241596230179e-05, "loss": 0.8453, "step": 178510 }, { "epoch": 3.1341842377850733, "grad_norm": 0.05474900694994087, "learning_rate": 6.47820445927517e-05, "loss": 0.8398, "step": 178520 }, { "epoch": 3.1343598026650747, "grad_norm": 0.04375012058767881, "learning_rate": 6.477167368209009e-05, "loss": 0.8458, "step": 178530 }, { "epoch": 3.1345353675450762, "grad_norm": 0.08876199821895149, "learning_rate": 6.47613032304483e-05, "loss": 0.8432, "step": 178540 }, { "epoch": 3.1347109324250777, "grad_norm": 0.04834687477612056, "learning_rate": 6.475093323795777e-05, "loss": 0.8395, "step": 178550 }, { "epoch": 3.134886497305079, "grad_norm": 0.056333186252766565, "learning_rate": 6.474056370474981e-05, "loss": 0.8432, "step": 178560 }, { "epoch": 3.1350620621850807, "grad_norm": 0.058316199963526175, "learning_rate": 6.473019463095591e-05, "loss": 0.8367, "step": 178570 }, { "epoch": 3.1352376270650817, "grad_norm": 0.07108979025378816, "learning_rate": 6.471982601670737e-05, "loss": 0.8483, "step": 178580 }, { "epoch": 3.135413191945083, "grad_norm": 0.05725074094522768, "learning_rate": 6.470945786213557e-05, "loss": 0.8467, "step": 178590 }, { "epoch": 3.1355887568250846, "grad_norm": 0.0473667105283192, "learning_rate": 6.469909016737193e-05, "loss": 0.8432, "step": 178600 }, { "epoch": 3.135764321705086, "grad_norm": 0.05539484821659648, "learning_rate": 6.468872293254777e-05, "loss": 0.8467, "step": 178610 }, { "epoch": 3.1359398865850876, "grad_norm": 0.047388617723978704, "learning_rate": 6.467835615779441e-05, "loss": 0.8433, "step": 178620 }, { "epoch": 3.136115451465089, "grad_norm": 0.054421660295673496, "learning_rate": 6.466798984324327e-05, "loss": 0.8415, "step": 178630 }, { "epoch": 3.1362910163450906, "grad_norm": 0.07174169831441726, "learning_rate": 6.465762398902567e-05, "loss": 0.8405, "step": 178640 }, { "epoch": 3.1364665812250916, "grad_norm": 0.05874783902764889, "learning_rate": 6.464725859527294e-05, "loss": 0.847, "step": 178650 }, { "epoch": 3.136642146105093, "grad_norm": 0.0411629571461877, "learning_rate": 6.463689366211639e-05, "loss": 0.8461, "step": 178660 }, { "epoch": 3.1368177109850945, "grad_norm": 0.04513011414921116, "learning_rate": 6.462652918968738e-05, "loss": 0.8438, "step": 178670 }, { "epoch": 3.136993275865096, "grad_norm": 0.0570281454505495, "learning_rate": 6.461616517811722e-05, "loss": 0.842, "step": 178680 }, { "epoch": 3.1371688407450975, "grad_norm": 0.04561104062250132, "learning_rate": 6.460580162753717e-05, "loss": 0.8472, "step": 178690 }, { "epoch": 3.1373444056250985, "grad_norm": 0.05495913863318287, "learning_rate": 6.459543853807864e-05, "loss": 0.8453, "step": 178700 }, { "epoch": 3.1375199705051, "grad_norm": 0.057362467106024, "learning_rate": 6.458507590987289e-05, "loss": 0.8383, "step": 178710 }, { "epoch": 3.1376955353851015, "grad_norm": 0.05580102352375802, "learning_rate": 6.457471374305119e-05, "loss": 0.8385, "step": 178720 }, { "epoch": 3.137871100265103, "grad_norm": 0.05275123280012451, "learning_rate": 6.456435203774484e-05, "loss": 0.8393, "step": 178730 }, { "epoch": 3.1380466651451044, "grad_norm": 0.047782537132588156, "learning_rate": 6.455399079408513e-05, "loss": 0.8386, "step": 178740 }, { "epoch": 3.138222230025106, "grad_norm": 0.05249016959511997, "learning_rate": 6.454363001220336e-05, "loss": 0.851, "step": 178750 }, { "epoch": 3.1383977949051074, "grad_norm": 0.04659148616115913, "learning_rate": 6.453326969223073e-05, "loss": 0.8357, "step": 178760 }, { "epoch": 3.1385733597851084, "grad_norm": 0.044996624513579955, "learning_rate": 6.452290983429863e-05, "loss": 0.8423, "step": 178770 }, { "epoch": 3.13874892466511, "grad_norm": 0.08138170183437587, "learning_rate": 6.451255043853825e-05, "loss": 0.841, "step": 178780 }, { "epoch": 3.1389244895451114, "grad_norm": 0.05460043834890342, "learning_rate": 6.45021915050808e-05, "loss": 0.8443, "step": 178790 }, { "epoch": 3.139100054425113, "grad_norm": 0.046086601702064474, "learning_rate": 6.44918330340576e-05, "loss": 0.8427, "step": 178800 }, { "epoch": 3.1392756193051143, "grad_norm": 0.05081017724090852, "learning_rate": 6.448147502559989e-05, "loss": 0.845, "step": 178810 }, { "epoch": 3.139451184185116, "grad_norm": 0.06520915132643486, "learning_rate": 6.44711174798389e-05, "loss": 0.8415, "step": 178820 }, { "epoch": 3.139626749065117, "grad_norm": 0.06073759458287094, "learning_rate": 6.44607603969058e-05, "loss": 0.8322, "step": 178830 }, { "epoch": 3.1398023139451183, "grad_norm": 0.056793542917856155, "learning_rate": 6.445040377693193e-05, "loss": 0.8451, "step": 178840 }, { "epoch": 3.13997787882512, "grad_norm": 0.04692638731246804, "learning_rate": 6.444004762004842e-05, "loss": 0.8382, "step": 178850 }, { "epoch": 3.1401534437051213, "grad_norm": 0.055049361543355775, "learning_rate": 6.442969192638647e-05, "loss": 0.8408, "step": 178860 }, { "epoch": 3.1403290085851228, "grad_norm": 0.04727303907667797, "learning_rate": 6.441933669607739e-05, "loss": 0.8477, "step": 178870 }, { "epoch": 3.1405045734651242, "grad_norm": 0.06422244986003404, "learning_rate": 6.440898192925232e-05, "loss": 0.848, "step": 178880 }, { "epoch": 3.1406801383451253, "grad_norm": 0.07494289049452428, "learning_rate": 6.439862762604244e-05, "loss": 0.8459, "step": 178890 }, { "epoch": 3.1408557032251267, "grad_norm": 0.047421725440594815, "learning_rate": 6.438827378657897e-05, "loss": 0.846, "step": 178900 }, { "epoch": 3.1410312681051282, "grad_norm": 0.05784004952017789, "learning_rate": 6.437792041099309e-05, "loss": 0.8452, "step": 178910 }, { "epoch": 3.1412068329851297, "grad_norm": 0.058854750341155115, "learning_rate": 6.436756749941598e-05, "loss": 0.8451, "step": 178920 }, { "epoch": 3.141382397865131, "grad_norm": 0.05827025082622834, "learning_rate": 6.43572150519788e-05, "loss": 0.8422, "step": 178930 }, { "epoch": 3.1415579627451327, "grad_norm": 0.052495763461346254, "learning_rate": 6.434686306881273e-05, "loss": 0.8352, "step": 178940 }, { "epoch": 3.1417335276251337, "grad_norm": 0.05647728291398512, "learning_rate": 6.433651155004895e-05, "loss": 0.8403, "step": 178950 }, { "epoch": 3.141909092505135, "grad_norm": 0.052186362278243116, "learning_rate": 6.432616049581856e-05, "loss": 0.8404, "step": 178960 }, { "epoch": 3.1420846573851366, "grad_norm": 0.0609229412661522, "learning_rate": 6.431580990625276e-05, "loss": 0.843, "step": 178970 }, { "epoch": 3.142260222265138, "grad_norm": 0.06077849000626897, "learning_rate": 6.43054597814827e-05, "loss": 0.837, "step": 178980 }, { "epoch": 3.1424357871451396, "grad_norm": 0.06995258551475193, "learning_rate": 6.429511012163949e-05, "loss": 0.8476, "step": 178990 }, { "epoch": 3.142611352025141, "grad_norm": 0.06472802178396476, "learning_rate": 6.428476092685425e-05, "loss": 0.8312, "step": 179000 }, { "epoch": 3.142786916905142, "grad_norm": 0.05504365584092887, "learning_rate": 6.427441219725815e-05, "loss": 0.8371, "step": 179010 }, { "epoch": 3.1429624817851436, "grad_norm": 0.05556149318751837, "learning_rate": 6.426406393298226e-05, "loss": 0.8341, "step": 179020 }, { "epoch": 3.143138046665145, "grad_norm": 0.040136408992915644, "learning_rate": 6.425371613415771e-05, "loss": 0.8338, "step": 179030 }, { "epoch": 3.1433136115451465, "grad_norm": 0.07215797935606835, "learning_rate": 6.424336880091564e-05, "loss": 0.845, "step": 179040 }, { "epoch": 3.143489176425148, "grad_norm": 0.05428939529380394, "learning_rate": 6.423302193338715e-05, "loss": 0.8499, "step": 179050 }, { "epoch": 3.1436647413051495, "grad_norm": 0.05263506993872728, "learning_rate": 6.422267553170329e-05, "loss": 0.8396, "step": 179060 }, { "epoch": 3.1438403061851505, "grad_norm": 0.05414221175979501, "learning_rate": 6.42123295959952e-05, "loss": 0.8428, "step": 179070 }, { "epoch": 3.144015871065152, "grad_norm": 0.058732870138231384, "learning_rate": 6.420198412639395e-05, "loss": 0.8376, "step": 179080 }, { "epoch": 3.1441914359451535, "grad_norm": 0.054402683194074083, "learning_rate": 6.419163912303059e-05, "loss": 0.8354, "step": 179090 }, { "epoch": 3.144367000825155, "grad_norm": 0.051207716253714494, "learning_rate": 6.418129458603619e-05, "loss": 0.8424, "step": 179100 }, { "epoch": 3.1445425657051564, "grad_norm": 0.07356963151404143, "learning_rate": 6.417095051554189e-05, "loss": 0.8481, "step": 179110 }, { "epoch": 3.144718130585158, "grad_norm": 0.05333974481641849, "learning_rate": 6.416060691167872e-05, "loss": 0.8416, "step": 179120 }, { "epoch": 3.144893695465159, "grad_norm": 0.052742192577020675, "learning_rate": 6.415026377457767e-05, "loss": 0.8371, "step": 179130 }, { "epoch": 3.1450692603451604, "grad_norm": 0.06651033098947766, "learning_rate": 6.413992110436986e-05, "loss": 0.8368, "step": 179140 }, { "epoch": 3.145244825225162, "grad_norm": 0.05642497347534963, "learning_rate": 6.412957890118633e-05, "loss": 0.8384, "step": 179150 }, { "epoch": 3.1454203901051634, "grad_norm": 0.07430361949388749, "learning_rate": 6.411923716515809e-05, "loss": 0.8368, "step": 179160 }, { "epoch": 3.145595954985165, "grad_norm": 0.04400441129508039, "learning_rate": 6.410889589641618e-05, "loss": 0.8477, "step": 179170 }, { "epoch": 3.1457715198651663, "grad_norm": 0.042755051733279315, "learning_rate": 6.409855509509163e-05, "loss": 0.8435, "step": 179180 }, { "epoch": 3.1459470847451674, "grad_norm": 0.06702723955601185, "learning_rate": 6.408821476131546e-05, "loss": 0.8353, "step": 179190 }, { "epoch": 3.146122649625169, "grad_norm": 0.05329531883500681, "learning_rate": 6.407787489521867e-05, "loss": 0.8432, "step": 179200 }, { "epoch": 3.1462982145051703, "grad_norm": 0.047773650294997236, "learning_rate": 6.40675354969323e-05, "loss": 0.845, "step": 179210 }, { "epoch": 3.146473779385172, "grad_norm": 0.05822065857073286, "learning_rate": 6.405719656658733e-05, "loss": 0.8461, "step": 179220 }, { "epoch": 3.1466493442651733, "grad_norm": 0.04761168140264076, "learning_rate": 6.404685810431478e-05, "loss": 0.8344, "step": 179230 }, { "epoch": 3.1468249091451748, "grad_norm": 0.0428686081982245, "learning_rate": 6.403652011024558e-05, "loss": 0.8411, "step": 179240 }, { "epoch": 3.147000474025176, "grad_norm": 0.04795499080781019, "learning_rate": 6.402618258451077e-05, "loss": 0.8369, "step": 179250 }, { "epoch": 3.1471760389051773, "grad_norm": 0.048448829765550906, "learning_rate": 6.401584552724132e-05, "loss": 0.8437, "step": 179260 }, { "epoch": 3.1473516037851788, "grad_norm": 0.05509264792456271, "learning_rate": 6.400550893856816e-05, "loss": 0.8485, "step": 179270 }, { "epoch": 3.1475271686651802, "grad_norm": 0.05611725235717654, "learning_rate": 6.399517281862233e-05, "loss": 0.8455, "step": 179280 }, { "epoch": 3.1477027335451817, "grad_norm": 0.046597549023473406, "learning_rate": 6.398483716753479e-05, "loss": 0.8438, "step": 179290 }, { "epoch": 3.147878298425183, "grad_norm": 0.08295332716009504, "learning_rate": 6.397450198543638e-05, "loss": 0.8344, "step": 179300 }, { "epoch": 3.1480538633051847, "grad_norm": 0.05034785425743946, "learning_rate": 6.396416727245816e-05, "loss": 0.8517, "step": 179310 }, { "epoch": 3.1482294281851857, "grad_norm": 0.044171440571531, "learning_rate": 6.395383302873104e-05, "loss": 0.8405, "step": 179320 }, { "epoch": 3.148404993065187, "grad_norm": 0.05020234400837109, "learning_rate": 6.394349925438597e-05, "loss": 0.8436, "step": 179330 }, { "epoch": 3.1485805579451887, "grad_norm": 0.05161970981505903, "learning_rate": 6.393316594955382e-05, "loss": 0.8412, "step": 179340 }, { "epoch": 3.14875612282519, "grad_norm": 0.0547235188590037, "learning_rate": 6.39228331143656e-05, "loss": 0.8465, "step": 179350 }, { "epoch": 3.1489316877051916, "grad_norm": 0.04889940475593968, "learning_rate": 6.391250074895217e-05, "loss": 0.8379, "step": 179360 }, { "epoch": 3.1491072525851926, "grad_norm": 0.05451759078277569, "learning_rate": 6.390216885344445e-05, "loss": 0.8424, "step": 179370 }, { "epoch": 3.149282817465194, "grad_norm": 0.05202645454684516, "learning_rate": 6.389183742797339e-05, "loss": 0.8357, "step": 179380 }, { "epoch": 3.1494583823451956, "grad_norm": 0.05241556934800756, "learning_rate": 6.388150647266985e-05, "loss": 0.8481, "step": 179390 }, { "epoch": 3.149633947225197, "grad_norm": 0.06045961485317918, "learning_rate": 6.387117598766476e-05, "loss": 0.8378, "step": 179400 }, { "epoch": 3.1498095121051986, "grad_norm": 0.06602853791045067, "learning_rate": 6.386084597308896e-05, "loss": 0.8468, "step": 179410 }, { "epoch": 3.1499850769852, "grad_norm": 0.048580587127177804, "learning_rate": 6.385051642907337e-05, "loss": 0.8441, "step": 179420 }, { "epoch": 3.1501606418652015, "grad_norm": 0.048875359271861464, "learning_rate": 6.384018735574887e-05, "loss": 0.8414, "step": 179430 }, { "epoch": 3.1503362067452025, "grad_norm": 0.07266881317918883, "learning_rate": 6.382985875324626e-05, "loss": 0.8392, "step": 179440 }, { "epoch": 3.150511771625204, "grad_norm": 0.0561331223402171, "learning_rate": 6.381953062169652e-05, "loss": 0.841, "step": 179450 }, { "epoch": 3.1506873365052055, "grad_norm": 0.05526824443792002, "learning_rate": 6.380920296123045e-05, "loss": 0.8399, "step": 179460 }, { "epoch": 3.150862901385207, "grad_norm": 0.05029794998574817, "learning_rate": 6.379887577197886e-05, "loss": 0.8377, "step": 179470 }, { "epoch": 3.1510384662652084, "grad_norm": 0.055607824227515965, "learning_rate": 6.37885490540727e-05, "loss": 0.8365, "step": 179480 }, { "epoch": 3.1512140311452095, "grad_norm": 0.09091493454644883, "learning_rate": 6.377822280764272e-05, "loss": 0.8345, "step": 179490 }, { "epoch": 3.151389596025211, "grad_norm": 0.06431042537821893, "learning_rate": 6.376789703281981e-05, "loss": 0.8329, "step": 179500 }, { "epoch": 3.1515651609052124, "grad_norm": 0.047913979775700395, "learning_rate": 6.375757172973477e-05, "loss": 0.8429, "step": 179510 }, { "epoch": 3.151740725785214, "grad_norm": 0.06256844542368858, "learning_rate": 6.374724689851845e-05, "loss": 0.8384, "step": 179520 }, { "epoch": 3.1519162906652154, "grad_norm": 0.04535143417856363, "learning_rate": 6.373692253930163e-05, "loss": 0.8515, "step": 179530 }, { "epoch": 3.152091855545217, "grad_norm": 0.055138511051176946, "learning_rate": 6.372659865221511e-05, "loss": 0.8393, "step": 179540 }, { "epoch": 3.1522674204252183, "grad_norm": 0.05432848103272801, "learning_rate": 6.371627523738979e-05, "loss": 0.8469, "step": 179550 }, { "epoch": 3.1524429853052194, "grad_norm": 0.057076011055602914, "learning_rate": 6.370595229495638e-05, "loss": 0.8388, "step": 179560 }, { "epoch": 3.152618550185221, "grad_norm": 0.05352763153660033, "learning_rate": 6.369562982504572e-05, "loss": 0.8449, "step": 179570 }, { "epoch": 3.1527941150652223, "grad_norm": 0.05382227035672381, "learning_rate": 6.368530782778857e-05, "loss": 0.847, "step": 179580 }, { "epoch": 3.152969679945224, "grad_norm": 0.06792562622809295, "learning_rate": 6.367498630331573e-05, "loss": 0.8455, "step": 179590 }, { "epoch": 3.1531452448252253, "grad_norm": 0.05964740227212663, "learning_rate": 6.366466525175794e-05, "loss": 0.8371, "step": 179600 }, { "epoch": 3.1533208097052268, "grad_norm": 0.04635053777027475, "learning_rate": 6.365434467324598e-05, "loss": 0.8401, "step": 179610 }, { "epoch": 3.153496374585228, "grad_norm": 0.06475959620330492, "learning_rate": 6.364402456791067e-05, "loss": 0.8376, "step": 179620 }, { "epoch": 3.1536719394652293, "grad_norm": 0.05649088765350369, "learning_rate": 6.363370493588276e-05, "loss": 0.8411, "step": 179630 }, { "epoch": 3.1538475043452308, "grad_norm": 0.06404285603878118, "learning_rate": 6.362338577729289e-05, "loss": 0.8473, "step": 179640 }, { "epoch": 3.1540230692252322, "grad_norm": 0.04986700338782178, "learning_rate": 6.361306709227193e-05, "loss": 0.8378, "step": 179650 }, { "epoch": 3.1541986341052337, "grad_norm": 0.06121834183212604, "learning_rate": 6.36027488809506e-05, "loss": 0.8332, "step": 179660 }, { "epoch": 3.154374198985235, "grad_norm": 0.05113835253916613, "learning_rate": 6.359243114345957e-05, "loss": 0.8361, "step": 179670 }, { "epoch": 3.1545497638652362, "grad_norm": 0.06353809392727275, "learning_rate": 6.358211387992959e-05, "loss": 0.8424, "step": 179680 }, { "epoch": 3.1547253287452377, "grad_norm": 0.059267217563659165, "learning_rate": 6.357179709049145e-05, "loss": 0.8345, "step": 179690 }, { "epoch": 3.154900893625239, "grad_norm": 0.05138101033357843, "learning_rate": 6.356148077527579e-05, "loss": 0.8499, "step": 179700 }, { "epoch": 3.1550764585052407, "grad_norm": 0.04755272296793122, "learning_rate": 6.355116493441331e-05, "loss": 0.8425, "step": 179710 }, { "epoch": 3.155252023385242, "grad_norm": 0.049912330167360686, "learning_rate": 6.354084956803478e-05, "loss": 0.8439, "step": 179720 }, { "epoch": 3.1554275882652436, "grad_norm": 0.049771245013577894, "learning_rate": 6.353053467627087e-05, "loss": 0.837, "step": 179730 }, { "epoch": 3.1556031531452446, "grad_norm": 0.05311701426058319, "learning_rate": 6.352022025925226e-05, "loss": 0.8402, "step": 179740 }, { "epoch": 3.155778718025246, "grad_norm": 0.04912449477067914, "learning_rate": 6.350990631710965e-05, "loss": 0.8317, "step": 179750 }, { "epoch": 3.1559542829052476, "grad_norm": 0.05983226396313665, "learning_rate": 6.34995928499737e-05, "loss": 0.837, "step": 179760 }, { "epoch": 3.156129847785249, "grad_norm": 0.061328086678976004, "learning_rate": 6.34892798579751e-05, "loss": 0.8403, "step": 179770 }, { "epoch": 3.1563054126652506, "grad_norm": 0.04998998909930003, "learning_rate": 6.347896734124449e-05, "loss": 0.8406, "step": 179780 }, { "epoch": 3.156480977545252, "grad_norm": 0.04955459625168574, "learning_rate": 6.346865529991258e-05, "loss": 0.8448, "step": 179790 }, { "epoch": 3.156656542425253, "grad_norm": 0.06202081837031431, "learning_rate": 6.345834373411004e-05, "loss": 0.8465, "step": 179800 }, { "epoch": 3.1568321073052545, "grad_norm": 0.07046672853026174, "learning_rate": 6.34480326439674e-05, "loss": 0.839, "step": 179810 }, { "epoch": 3.157007672185256, "grad_norm": 0.04876546825870116, "learning_rate": 6.343772202961543e-05, "loss": 0.8463, "step": 179820 }, { "epoch": 3.1571832370652575, "grad_norm": 0.05353984501547024, "learning_rate": 6.342741189118472e-05, "loss": 0.8399, "step": 179830 }, { "epoch": 3.157358801945259, "grad_norm": 0.06360065985506613, "learning_rate": 6.341710222880588e-05, "loss": 0.8438, "step": 179840 }, { "epoch": 3.1575343668252605, "grad_norm": 0.04984387626493659, "learning_rate": 6.340679304260956e-05, "loss": 0.8377, "step": 179850 }, { "epoch": 3.1577099317052615, "grad_norm": 0.06014650594962761, "learning_rate": 6.339648433272642e-05, "loss": 0.8463, "step": 179860 }, { "epoch": 3.157885496585263, "grad_norm": 0.06570244686012702, "learning_rate": 6.3386176099287e-05, "loss": 0.8461, "step": 179870 }, { "epoch": 3.1580610614652644, "grad_norm": 0.05302085122908752, "learning_rate": 6.337586834242189e-05, "loss": 0.8352, "step": 179880 }, { "epoch": 3.158236626345266, "grad_norm": 0.044332997665070505, "learning_rate": 6.33655610622618e-05, "loss": 0.8319, "step": 179890 }, { "epoch": 3.1584121912252674, "grad_norm": 0.051084365830957615, "learning_rate": 6.335525425893725e-05, "loss": 0.8443, "step": 179900 }, { "epoch": 3.158587756105269, "grad_norm": 0.06715008835809087, "learning_rate": 6.334494793257885e-05, "loss": 0.8344, "step": 179910 }, { "epoch": 3.15876332098527, "grad_norm": 0.05504110657395569, "learning_rate": 6.333464208331717e-05, "loss": 0.8325, "step": 179920 }, { "epoch": 3.1589388858652714, "grad_norm": 0.055172892807333396, "learning_rate": 6.332433671128279e-05, "loss": 0.8402, "step": 179930 }, { "epoch": 3.159114450745273, "grad_norm": 0.04910433914329564, "learning_rate": 6.331403181660629e-05, "loss": 0.8347, "step": 179940 }, { "epoch": 3.1592900156252743, "grad_norm": 0.047886136068208546, "learning_rate": 6.330372739941819e-05, "loss": 0.8403, "step": 179950 }, { "epoch": 3.159465580505276, "grad_norm": 0.045145246765841134, "learning_rate": 6.329342345984913e-05, "loss": 0.8413, "step": 179960 }, { "epoch": 3.1596411453852773, "grad_norm": 0.061582889259715216, "learning_rate": 6.328311999802964e-05, "loss": 0.8456, "step": 179970 }, { "epoch": 3.1598167102652783, "grad_norm": 0.046387992353655975, "learning_rate": 6.32728170140902e-05, "loss": 0.845, "step": 179980 }, { "epoch": 3.15999227514528, "grad_norm": 0.05440440243678406, "learning_rate": 6.326251450816142e-05, "loss": 0.8352, "step": 179990 }, { "epoch": 3.1601678400252813, "grad_norm": 0.047461512671409196, "learning_rate": 6.325221248037381e-05, "loss": 0.8485, "step": 180000 }, { "epoch": 3.1603434049052828, "grad_norm": 0.0518894029551176, "learning_rate": 6.324191093085791e-05, "loss": 0.8348, "step": 180010 }, { "epoch": 3.1605189697852842, "grad_norm": 0.04339280603388512, "learning_rate": 6.323160985974424e-05, "loss": 0.8421, "step": 180020 }, { "epoch": 3.1606945346652857, "grad_norm": 0.06725375397048222, "learning_rate": 6.322130926716331e-05, "loss": 0.8411, "step": 180030 }, { "epoch": 3.1608700995452867, "grad_norm": 0.045162511370106594, "learning_rate": 6.321100915324563e-05, "loss": 0.8348, "step": 180040 }, { "epoch": 3.1610456644252882, "grad_norm": 0.05104739015070901, "learning_rate": 6.320070951812167e-05, "loss": 0.8403, "step": 180050 }, { "epoch": 3.1612212293052897, "grad_norm": 0.05708030093043129, "learning_rate": 6.3190410361922e-05, "loss": 0.8378, "step": 180060 }, { "epoch": 3.161396794185291, "grad_norm": 0.07278597025012958, "learning_rate": 6.318011168477708e-05, "loss": 0.846, "step": 180070 }, { "epoch": 3.1615723590652927, "grad_norm": 0.060195178224976255, "learning_rate": 6.31698134868174e-05, "loss": 0.8516, "step": 180080 }, { "epoch": 3.161747923945294, "grad_norm": 0.052905326410571815, "learning_rate": 6.31595157681734e-05, "loss": 0.8385, "step": 180090 }, { "epoch": 3.1619234888252956, "grad_norm": 0.051875134896985475, "learning_rate": 6.314921852897562e-05, "loss": 0.8452, "step": 180100 }, { "epoch": 3.1620990537052966, "grad_norm": 0.04464887404773847, "learning_rate": 6.31389217693545e-05, "loss": 0.8347, "step": 180110 }, { "epoch": 3.162274618585298, "grad_norm": 0.04557875224433096, "learning_rate": 6.312862548944043e-05, "loss": 0.8394, "step": 180120 }, { "epoch": 3.1624501834652996, "grad_norm": 0.055086357200098426, "learning_rate": 6.311832968936399e-05, "loss": 0.8398, "step": 180130 }, { "epoch": 3.162625748345301, "grad_norm": 0.08712715535457383, "learning_rate": 6.31080343692556e-05, "loss": 0.8418, "step": 180140 }, { "epoch": 3.1628013132253026, "grad_norm": 0.046510288974922424, "learning_rate": 6.309773952924563e-05, "loss": 0.8345, "step": 180150 }, { "epoch": 3.1629768781053036, "grad_norm": 0.045026370182620436, "learning_rate": 6.308744516946455e-05, "loss": 0.8389, "step": 180160 }, { "epoch": 3.163152442985305, "grad_norm": 0.05958439175854949, "learning_rate": 6.307715129004285e-05, "loss": 0.8383, "step": 180170 }, { "epoch": 3.1633280078653065, "grad_norm": 0.058576582909534713, "learning_rate": 6.30668578911109e-05, "loss": 0.8412, "step": 180180 }, { "epoch": 3.163503572745308, "grad_norm": 0.05955069593397579, "learning_rate": 6.305656497279911e-05, "loss": 0.8506, "step": 180190 }, { "epoch": 3.1636791376253095, "grad_norm": 0.04490041517342094, "learning_rate": 6.304627253523793e-05, "loss": 0.8386, "step": 180200 }, { "epoch": 3.163854702505311, "grad_norm": 0.08534248557877444, "learning_rate": 6.303598057855775e-05, "loss": 0.8441, "step": 180210 }, { "epoch": 3.1640302673853125, "grad_norm": 0.05748936368876836, "learning_rate": 6.302568910288894e-05, "loss": 0.8401, "step": 180220 }, { "epoch": 3.1642058322653135, "grad_norm": 0.055979303012549636, "learning_rate": 6.301539810836196e-05, "loss": 0.8427, "step": 180230 }, { "epoch": 3.164381397145315, "grad_norm": 0.04943045738578314, "learning_rate": 6.300510759510716e-05, "loss": 0.8405, "step": 180240 }, { "epoch": 3.1645569620253164, "grad_norm": 0.06674567482082877, "learning_rate": 6.299481756325494e-05, "loss": 0.8469, "step": 180250 }, { "epoch": 3.164732526905318, "grad_norm": 0.048084340339558634, "learning_rate": 6.298452801293564e-05, "loss": 0.8371, "step": 180260 }, { "epoch": 3.1649080917853194, "grad_norm": 0.06796961202722816, "learning_rate": 6.297423894427968e-05, "loss": 0.845, "step": 180270 }, { "epoch": 3.165083656665321, "grad_norm": 0.07201925640858722, "learning_rate": 6.296395035741741e-05, "loss": 0.8359, "step": 180280 }, { "epoch": 3.165259221545322, "grad_norm": 0.0663537025585738, "learning_rate": 6.295366225247911e-05, "loss": 0.8408, "step": 180290 }, { "epoch": 3.1654347864253234, "grad_norm": 0.04644661502187092, "learning_rate": 6.294337462959526e-05, "loss": 0.8324, "step": 180300 }, { "epoch": 3.165610351305325, "grad_norm": 0.0479318049620793, "learning_rate": 6.293308748889618e-05, "loss": 0.8423, "step": 180310 }, { "epoch": 3.1657859161853263, "grad_norm": 0.07049427839563434, "learning_rate": 6.292280083051213e-05, "loss": 0.8507, "step": 180320 }, { "epoch": 3.165961481065328, "grad_norm": 0.06740242628621466, "learning_rate": 6.291251465457347e-05, "loss": 0.8411, "step": 180330 }, { "epoch": 3.1661370459453293, "grad_norm": 0.07570005204121985, "learning_rate": 6.290222896121058e-05, "loss": 0.8443, "step": 180340 }, { "epoch": 3.1663126108253303, "grad_norm": 0.07691770931878841, "learning_rate": 6.289194375055374e-05, "loss": 0.8544, "step": 180350 }, { "epoch": 3.166488175705332, "grad_norm": 0.04996462582109361, "learning_rate": 6.288165902273329e-05, "loss": 0.8438, "step": 180360 }, { "epoch": 3.1666637405853333, "grad_norm": 0.055989922568162265, "learning_rate": 6.287137477787951e-05, "loss": 0.8448, "step": 180370 }, { "epoch": 3.1668393054653348, "grad_norm": 0.05638742606009302, "learning_rate": 6.286109101612273e-05, "loss": 0.8411, "step": 180380 }, { "epoch": 3.1670148703453362, "grad_norm": 0.08108993076959144, "learning_rate": 6.28508077375932e-05, "loss": 0.8451, "step": 180390 }, { "epoch": 3.1671904352253377, "grad_norm": 0.054956135059987865, "learning_rate": 6.284052494242128e-05, "loss": 0.8353, "step": 180400 }, { "epoch": 3.1673660001053388, "grad_norm": 0.04881250876449361, "learning_rate": 6.283024263073723e-05, "loss": 0.8416, "step": 180410 }, { "epoch": 3.1675415649853402, "grad_norm": 0.07126810369491665, "learning_rate": 6.281996080267132e-05, "loss": 0.8399, "step": 180420 }, { "epoch": 3.1677171298653417, "grad_norm": 0.04225393113233018, "learning_rate": 6.28096794583538e-05, "loss": 0.8324, "step": 180430 }, { "epoch": 3.167892694745343, "grad_norm": 0.06385307221471702, "learning_rate": 6.279939859791499e-05, "loss": 0.8367, "step": 180440 }, { "epoch": 3.1680682596253447, "grad_norm": 0.05229878924282728, "learning_rate": 6.278911822148512e-05, "loss": 0.8378, "step": 180450 }, { "epoch": 3.168243824505346, "grad_norm": 0.057668196536324816, "learning_rate": 6.277883832919438e-05, "loss": 0.845, "step": 180460 }, { "epoch": 3.168419389385347, "grad_norm": 0.0630826884096026, "learning_rate": 6.276855892117316e-05, "loss": 0.8366, "step": 180470 }, { "epoch": 3.1685949542653487, "grad_norm": 0.061361750328243723, "learning_rate": 6.275827999755162e-05, "loss": 0.8399, "step": 180480 }, { "epoch": 3.16877051914535, "grad_norm": 0.05308782781445419, "learning_rate": 6.274800155845999e-05, "loss": 0.8431, "step": 180490 }, { "epoch": 3.1689460840253516, "grad_norm": 0.04900390663349413, "learning_rate": 6.273772360402848e-05, "loss": 0.8338, "step": 180500 }, { "epoch": 3.169121648905353, "grad_norm": 0.0551037370330427, "learning_rate": 6.272744613438737e-05, "loss": 0.8502, "step": 180510 }, { "epoch": 3.1692972137853546, "grad_norm": 0.04388734707879429, "learning_rate": 6.271716914966685e-05, "loss": 0.8461, "step": 180520 }, { "epoch": 3.1694727786653556, "grad_norm": 0.04382186446396339, "learning_rate": 6.270689264999711e-05, "loss": 0.8473, "step": 180530 }, { "epoch": 3.169648343545357, "grad_norm": 0.05664815920304455, "learning_rate": 6.269661663550843e-05, "loss": 0.8491, "step": 180540 }, { "epoch": 3.1698239084253585, "grad_norm": 0.04288222525525371, "learning_rate": 6.268634110633094e-05, "loss": 0.8424, "step": 180550 }, { "epoch": 3.16999947330536, "grad_norm": 0.046780190313187, "learning_rate": 6.26760660625948e-05, "loss": 0.842, "step": 180560 }, { "epoch": 3.1701750381853615, "grad_norm": 0.07281545254013368, "learning_rate": 6.26657915044303e-05, "loss": 0.8398, "step": 180570 }, { "epoch": 3.170350603065363, "grad_norm": 0.07344323919408062, "learning_rate": 6.265551743196756e-05, "loss": 0.8369, "step": 180580 }, { "epoch": 3.170526167945364, "grad_norm": 0.05869499282737324, "learning_rate": 6.264524384533678e-05, "loss": 0.8351, "step": 180590 }, { "epoch": 3.1707017328253655, "grad_norm": 0.06416323156103834, "learning_rate": 6.263497074466806e-05, "loss": 0.8417, "step": 180600 }, { "epoch": 3.170877297705367, "grad_norm": 0.06510864350223267, "learning_rate": 6.262469813009166e-05, "loss": 0.8398, "step": 180610 }, { "epoch": 3.1710528625853684, "grad_norm": 0.07189241475950726, "learning_rate": 6.261442600173769e-05, "loss": 0.8489, "step": 180620 }, { "epoch": 3.17122842746537, "grad_norm": 0.04973017486028625, "learning_rate": 6.260415435973624e-05, "loss": 0.849, "step": 180630 }, { "epoch": 3.1714039923453714, "grad_norm": 0.06581163487127424, "learning_rate": 6.259388320421757e-05, "loss": 0.8445, "step": 180640 }, { "epoch": 3.1715795572253724, "grad_norm": 0.04687444186734894, "learning_rate": 6.258361253531177e-05, "loss": 0.8456, "step": 180650 }, { "epoch": 3.171755122105374, "grad_norm": 0.044858954594748736, "learning_rate": 6.257334235314895e-05, "loss": 0.8408, "step": 180660 }, { "epoch": 3.1719306869853754, "grad_norm": 0.04133317007295835, "learning_rate": 6.25630726578592e-05, "loss": 0.8467, "step": 180670 }, { "epoch": 3.172106251865377, "grad_norm": 0.054479477197170736, "learning_rate": 6.255280344957274e-05, "loss": 0.8385, "step": 180680 }, { "epoch": 3.1722818167453783, "grad_norm": 0.047565039899928184, "learning_rate": 6.254253472841961e-05, "loss": 0.8467, "step": 180690 }, { "epoch": 3.17245738162538, "grad_norm": 0.05435781419475874, "learning_rate": 6.253226649452992e-05, "loss": 0.8383, "step": 180700 }, { "epoch": 3.172632946505381, "grad_norm": 0.06244136455851963, "learning_rate": 6.25219987480338e-05, "loss": 0.8414, "step": 180710 }, { "epoch": 3.1728085113853823, "grad_norm": 0.04418778103169791, "learning_rate": 6.251173148906135e-05, "loss": 0.8432, "step": 180720 }, { "epoch": 3.172984076265384, "grad_norm": 0.06006616949020596, "learning_rate": 6.250146471774258e-05, "loss": 0.8401, "step": 180730 }, { "epoch": 3.1731596411453853, "grad_norm": 0.07924975495024111, "learning_rate": 6.249119843420767e-05, "loss": 0.8335, "step": 180740 }, { "epoch": 3.1733352060253868, "grad_norm": 0.07942122586671568, "learning_rate": 6.248093263858668e-05, "loss": 0.8433, "step": 180750 }, { "epoch": 3.1735107709053882, "grad_norm": 0.053563114130436414, "learning_rate": 6.247066733100963e-05, "loss": 0.8394, "step": 180760 }, { "epoch": 3.1736863357853897, "grad_norm": 0.054344173755844036, "learning_rate": 6.24604025116066e-05, "loss": 0.8444, "step": 180770 }, { "epoch": 3.1738619006653908, "grad_norm": 0.043487309515126554, "learning_rate": 6.245013818050767e-05, "loss": 0.8348, "step": 180780 }, { "epoch": 3.1740374655453922, "grad_norm": 0.05650546979678455, "learning_rate": 6.243987433784289e-05, "loss": 0.8438, "step": 180790 }, { "epoch": 3.1742130304253937, "grad_norm": 0.055122870902469964, "learning_rate": 6.242961098374223e-05, "loss": 0.8379, "step": 180800 }, { "epoch": 3.174388595305395, "grad_norm": 0.053771277268420216, "learning_rate": 6.241934811833585e-05, "loss": 0.8435, "step": 180810 }, { "epoch": 3.1745641601853967, "grad_norm": 0.05363921449183982, "learning_rate": 6.240908574175375e-05, "loss": 0.8409, "step": 180820 }, { "epoch": 3.1747397250653977, "grad_norm": 0.04313226840719622, "learning_rate": 6.239882385412589e-05, "loss": 0.8418, "step": 180830 }, { "epoch": 3.174915289945399, "grad_norm": 0.06616985742963215, "learning_rate": 6.238856245558229e-05, "loss": 0.8451, "step": 180840 }, { "epoch": 3.1750908548254007, "grad_norm": 0.048913073128427784, "learning_rate": 6.237830154625305e-05, "loss": 0.8416, "step": 180850 }, { "epoch": 3.175266419705402, "grad_norm": 0.056263092251537146, "learning_rate": 6.236804112626813e-05, "loss": 0.8447, "step": 180860 }, { "epoch": 3.1754419845854036, "grad_norm": 0.053500813372265095, "learning_rate": 6.235778119575752e-05, "loss": 0.8439, "step": 180870 }, { "epoch": 3.175617549465405, "grad_norm": 0.052779180041148975, "learning_rate": 6.234752175485123e-05, "loss": 0.8414, "step": 180880 }, { "epoch": 3.1757931143454066, "grad_norm": 0.050666317095893025, "learning_rate": 6.233726280367927e-05, "loss": 0.8447, "step": 180890 }, { "epoch": 3.1759686792254076, "grad_norm": 0.06156017355581826, "learning_rate": 6.232700434237156e-05, "loss": 0.8498, "step": 180900 }, { "epoch": 3.176144244105409, "grad_norm": 0.05133234558526922, "learning_rate": 6.231674637105814e-05, "loss": 0.8377, "step": 180910 }, { "epoch": 3.1763198089854106, "grad_norm": 0.06374287130573988, "learning_rate": 6.230648888986896e-05, "loss": 0.8386, "step": 180920 }, { "epoch": 3.176495373865412, "grad_norm": 0.06778422070349258, "learning_rate": 6.229623189893399e-05, "loss": 0.8408, "step": 180930 }, { "epoch": 3.1766709387454135, "grad_norm": 0.05135380738206118, "learning_rate": 6.228597539838315e-05, "loss": 0.8416, "step": 180940 }, { "epoch": 3.1768465036254145, "grad_norm": 0.07703661209523938, "learning_rate": 6.227571938834646e-05, "loss": 0.8443, "step": 180950 }, { "epoch": 3.177022068505416, "grad_norm": 0.07470155358029452, "learning_rate": 6.22654638689538e-05, "loss": 0.8431, "step": 180960 }, { "epoch": 3.1771976333854175, "grad_norm": 0.06262835823580958, "learning_rate": 6.225520884033509e-05, "loss": 0.834, "step": 180970 }, { "epoch": 3.177373198265419, "grad_norm": 0.05073348026579183, "learning_rate": 6.224495430262039e-05, "loss": 0.8414, "step": 180980 }, { "epoch": 3.1775487631454205, "grad_norm": 0.05446580382597247, "learning_rate": 6.223470025593953e-05, "loss": 0.8457, "step": 180990 }, { "epoch": 3.177724328025422, "grad_norm": 0.058848903961272146, "learning_rate": 6.222444670042244e-05, "loss": 0.8454, "step": 181000 }, { "epoch": 3.1778998929054234, "grad_norm": 0.054175683436700726, "learning_rate": 6.221419363619902e-05, "loss": 0.8434, "step": 181010 }, { "epoch": 3.1780754577854244, "grad_norm": 0.05936829474541057, "learning_rate": 6.22039410633992e-05, "loss": 0.839, "step": 181020 }, { "epoch": 3.178251022665426, "grad_norm": 0.055813825163259966, "learning_rate": 6.219368898215291e-05, "loss": 0.8453, "step": 181030 }, { "epoch": 3.1784265875454274, "grad_norm": 0.08023453835282786, "learning_rate": 6.218343739259e-05, "loss": 0.842, "step": 181040 }, { "epoch": 3.178602152425429, "grad_norm": 0.05468409611556574, "learning_rate": 6.21731862948404e-05, "loss": 0.8421, "step": 181050 }, { "epoch": 3.1787777173054304, "grad_norm": 0.06196587042531229, "learning_rate": 6.216293568903395e-05, "loss": 0.8411, "step": 181060 }, { "epoch": 3.178953282185432, "grad_norm": 0.05076389099872235, "learning_rate": 6.215268557530052e-05, "loss": 0.8376, "step": 181070 }, { "epoch": 3.179128847065433, "grad_norm": 0.059748366706869935, "learning_rate": 6.214243595377003e-05, "loss": 0.8509, "step": 181080 }, { "epoch": 3.1793044119454343, "grad_norm": 0.07886641774978191, "learning_rate": 6.213218682457235e-05, "loss": 0.8377, "step": 181090 }, { "epoch": 3.179479976825436, "grad_norm": 0.05199676916923831, "learning_rate": 6.21219381878373e-05, "loss": 0.845, "step": 181100 }, { "epoch": 3.1796555417054373, "grad_norm": 0.04875496093009314, "learning_rate": 6.211169004369474e-05, "loss": 0.8428, "step": 181110 }, { "epoch": 3.1798311065854388, "grad_norm": 0.05649105908492239, "learning_rate": 6.210144239227451e-05, "loss": 0.8413, "step": 181120 }, { "epoch": 3.1800066714654402, "grad_norm": 0.050538494709163204, "learning_rate": 6.209119523370648e-05, "loss": 0.8395, "step": 181130 }, { "epoch": 3.1801822363454413, "grad_norm": 0.05918713127219318, "learning_rate": 6.208094856812043e-05, "loss": 0.8414, "step": 181140 }, { "epoch": 3.1803578012254428, "grad_norm": 0.06255356176161767, "learning_rate": 6.207070239564625e-05, "loss": 0.8329, "step": 181150 }, { "epoch": 3.1805333661054442, "grad_norm": 0.043275783748467925, "learning_rate": 6.206045671641374e-05, "loss": 0.8514, "step": 181160 }, { "epoch": 3.1807089309854457, "grad_norm": 0.06918787025699287, "learning_rate": 6.20502115305527e-05, "loss": 0.8384, "step": 181170 }, { "epoch": 3.180884495865447, "grad_norm": 0.05777847693186216, "learning_rate": 6.20399668381929e-05, "loss": 0.8488, "step": 181180 }, { "epoch": 3.1810600607454487, "grad_norm": 0.05727158584837803, "learning_rate": 6.202972263946423e-05, "loss": 0.8436, "step": 181190 }, { "epoch": 3.1812356256254497, "grad_norm": 0.058678683683133115, "learning_rate": 6.201947893449644e-05, "loss": 0.8524, "step": 181200 }, { "epoch": 3.181411190505451, "grad_norm": 0.042658535986953315, "learning_rate": 6.20092357234193e-05, "loss": 0.8379, "step": 181210 }, { "epoch": 3.1815867553854527, "grad_norm": 0.0634597026273684, "learning_rate": 6.199899300636266e-05, "loss": 0.8485, "step": 181220 }, { "epoch": 3.181762320265454, "grad_norm": 0.04315030626162649, "learning_rate": 6.198875078345621e-05, "loss": 0.844, "step": 181230 }, { "epoch": 3.1819378851454556, "grad_norm": 0.05728204171799419, "learning_rate": 6.197850905482976e-05, "loss": 0.8391, "step": 181240 }, { "epoch": 3.182113450025457, "grad_norm": 0.05797562567173369, "learning_rate": 6.196826782061309e-05, "loss": 0.8397, "step": 181250 }, { "epoch": 3.182289014905458, "grad_norm": 0.04276410301245393, "learning_rate": 6.195802708093598e-05, "loss": 0.84, "step": 181260 }, { "epoch": 3.1824645797854596, "grad_norm": 0.0788013561071987, "learning_rate": 6.194778683592814e-05, "loss": 0.8392, "step": 181270 }, { "epoch": 3.182640144665461, "grad_norm": 0.04969994692136077, "learning_rate": 6.193754708571928e-05, "loss": 0.8401, "step": 181280 }, { "epoch": 3.1828157095454626, "grad_norm": 0.05729152470400016, "learning_rate": 6.192730783043923e-05, "loss": 0.8465, "step": 181290 }, { "epoch": 3.182991274425464, "grad_norm": 0.05033735311692828, "learning_rate": 6.191706907021765e-05, "loss": 0.8374, "step": 181300 }, { "epoch": 3.1831668393054655, "grad_norm": 0.04839769019867525, "learning_rate": 6.190683080518428e-05, "loss": 0.8459, "step": 181310 }, { "epoch": 3.1833424041854665, "grad_norm": 0.054295448713569824, "learning_rate": 6.18965930354689e-05, "loss": 0.8326, "step": 181320 }, { "epoch": 3.183517969065468, "grad_norm": 0.05677980282480508, "learning_rate": 6.18863557612012e-05, "loss": 0.8384, "step": 181330 }, { "epoch": 3.1836935339454695, "grad_norm": 0.041700215182891444, "learning_rate": 6.187611898251082e-05, "loss": 0.838, "step": 181340 }, { "epoch": 3.183869098825471, "grad_norm": 0.04233545720250186, "learning_rate": 6.18658826995275e-05, "loss": 0.8398, "step": 181350 }, { "epoch": 3.1840446637054725, "grad_norm": 0.06818250642331745, "learning_rate": 6.185564691238099e-05, "loss": 0.8424, "step": 181360 }, { "epoch": 3.184220228585474, "grad_norm": 0.043878073553094706, "learning_rate": 6.184541162120091e-05, "loss": 0.8532, "step": 181370 }, { "epoch": 3.184395793465475, "grad_norm": 0.052177287958350034, "learning_rate": 6.183517682611696e-05, "loss": 0.8512, "step": 181380 }, { "epoch": 3.1845713583454764, "grad_norm": 0.04728269916327154, "learning_rate": 6.182494252725885e-05, "loss": 0.8427, "step": 181390 }, { "epoch": 3.184746923225478, "grad_norm": 0.051861131145414414, "learning_rate": 6.181470872475622e-05, "loss": 0.8382, "step": 181400 }, { "epoch": 3.1849224881054794, "grad_norm": 0.05786377192256197, "learning_rate": 6.18044754187387e-05, "loss": 0.8425, "step": 181410 }, { "epoch": 3.185098052985481, "grad_norm": 0.044645338628613966, "learning_rate": 6.179424260933603e-05, "loss": 0.8362, "step": 181420 }, { "epoch": 3.1852736178654824, "grad_norm": 0.04759075492485572, "learning_rate": 6.178401029667783e-05, "loss": 0.8363, "step": 181430 }, { "epoch": 3.1854491827454834, "grad_norm": 0.0463291058293646, "learning_rate": 6.177377848089371e-05, "loss": 0.8387, "step": 181440 }, { "epoch": 3.185624747625485, "grad_norm": 0.04447494132091603, "learning_rate": 6.176354716211335e-05, "loss": 0.841, "step": 181450 }, { "epoch": 3.1858003125054863, "grad_norm": 0.041093321443565076, "learning_rate": 6.175331634046636e-05, "loss": 0.8432, "step": 181460 }, { "epoch": 3.185975877385488, "grad_norm": 0.05668744216104256, "learning_rate": 6.174308601608238e-05, "loss": 0.8395, "step": 181470 }, { "epoch": 3.1861514422654893, "grad_norm": 0.05554044271270587, "learning_rate": 6.173285618909098e-05, "loss": 0.8467, "step": 181480 }, { "epoch": 3.1863270071454908, "grad_norm": 0.04530003860336921, "learning_rate": 6.172262685962187e-05, "loss": 0.8486, "step": 181490 }, { "epoch": 3.186502572025492, "grad_norm": 0.05836989267378719, "learning_rate": 6.171239802780461e-05, "loss": 0.8354, "step": 181500 }, { "epoch": 3.1866781369054933, "grad_norm": 0.05616014028791638, "learning_rate": 6.17021696937688e-05, "loss": 0.8403, "step": 181510 }, { "epoch": 3.1868537017854948, "grad_norm": 0.060532107929449214, "learning_rate": 6.169194185764398e-05, "loss": 0.8414, "step": 181520 }, { "epoch": 3.1870292666654962, "grad_norm": 0.037465652310939956, "learning_rate": 6.16817145195598e-05, "loss": 0.8509, "step": 181530 }, { "epoch": 3.1872048315454977, "grad_norm": 0.05799962915630295, "learning_rate": 6.167148767964588e-05, "loss": 0.8458, "step": 181540 }, { "epoch": 3.187380396425499, "grad_norm": 0.05883798270195268, "learning_rate": 6.166126133803169e-05, "loss": 0.8455, "step": 181550 }, { "epoch": 3.1875559613055007, "grad_norm": 0.04752194771728433, "learning_rate": 6.16510354948469e-05, "loss": 0.8483, "step": 181560 }, { "epoch": 3.1877315261855017, "grad_norm": 0.06528562022606547, "learning_rate": 6.164081015022102e-05, "loss": 0.8459, "step": 181570 }, { "epoch": 3.187907091065503, "grad_norm": 0.051404185889496634, "learning_rate": 6.16305853042836e-05, "loss": 0.8324, "step": 181580 }, { "epoch": 3.1880826559455047, "grad_norm": 0.048897593165146296, "learning_rate": 6.162036095716423e-05, "loss": 0.8305, "step": 181590 }, { "epoch": 3.188258220825506, "grad_norm": 0.05865969460277963, "learning_rate": 6.161013710899244e-05, "loss": 0.8414, "step": 181600 }, { "epoch": 3.1884337857055076, "grad_norm": 0.05105169867169069, "learning_rate": 6.159991375989776e-05, "loss": 0.8453, "step": 181610 }, { "epoch": 3.1886093505855087, "grad_norm": 0.04476004825346111, "learning_rate": 6.158969091000969e-05, "loss": 0.8432, "step": 181620 }, { "epoch": 3.18878491546551, "grad_norm": 0.07446555843963132, "learning_rate": 6.157946855945784e-05, "loss": 0.8353, "step": 181630 }, { "epoch": 3.1889604803455116, "grad_norm": 0.05236043385505729, "learning_rate": 6.156924670837165e-05, "loss": 0.8483, "step": 181640 }, { "epoch": 3.189136045225513, "grad_norm": 0.05425391370297193, "learning_rate": 6.155902535688064e-05, "loss": 0.8458, "step": 181650 }, { "epoch": 3.1893116101055146, "grad_norm": 0.04968178553945077, "learning_rate": 6.154880450511436e-05, "loss": 0.8393, "step": 181660 }, { "epoch": 3.189487174985516, "grad_norm": 0.045683852443946155, "learning_rate": 6.153858415320231e-05, "loss": 0.844, "step": 181670 }, { "epoch": 3.1896627398655175, "grad_norm": 0.05606401016187325, "learning_rate": 6.152836430127396e-05, "loss": 0.8391, "step": 181680 }, { "epoch": 3.1898383047455185, "grad_norm": 0.06248938447817715, "learning_rate": 6.151814494945873e-05, "loss": 0.8343, "step": 181690 }, { "epoch": 3.19001386962552, "grad_norm": 0.061730276489407224, "learning_rate": 6.150792609788624e-05, "loss": 0.8373, "step": 181700 }, { "epoch": 3.1901894345055215, "grad_norm": 0.04862548794016038, "learning_rate": 6.149770774668588e-05, "loss": 0.8503, "step": 181710 }, { "epoch": 3.190364999385523, "grad_norm": 0.049580303604039526, "learning_rate": 6.148748989598712e-05, "loss": 0.8367, "step": 181720 }, { "epoch": 3.1905405642655245, "grad_norm": 0.050588146382512, "learning_rate": 6.147727254591945e-05, "loss": 0.8405, "step": 181730 }, { "epoch": 3.190716129145526, "grad_norm": 0.05617221704408805, "learning_rate": 6.146705569661232e-05, "loss": 0.8527, "step": 181740 }, { "epoch": 3.190891694025527, "grad_norm": 0.05029204643751995, "learning_rate": 6.145683934819512e-05, "loss": 0.8413, "step": 181750 }, { "epoch": 3.1910672589055284, "grad_norm": 0.05423673687936631, "learning_rate": 6.144662350079737e-05, "loss": 0.8413, "step": 181760 }, { "epoch": 3.19124282378553, "grad_norm": 0.049820229493160506, "learning_rate": 6.143640815454852e-05, "loss": 0.8421, "step": 181770 }, { "epoch": 3.1914183886655314, "grad_norm": 0.052359303424463, "learning_rate": 6.142619330957793e-05, "loss": 0.8387, "step": 181780 }, { "epoch": 3.191593953545533, "grad_norm": 0.05775085976374844, "learning_rate": 6.141597896601506e-05, "loss": 0.8401, "step": 181790 }, { "epoch": 3.1917695184255344, "grad_norm": 0.04730359820484511, "learning_rate": 6.140576512398931e-05, "loss": 0.8343, "step": 181800 }, { "epoch": 3.1919450833055354, "grad_norm": 0.04154236867709806, "learning_rate": 6.139555178363012e-05, "loss": 0.8425, "step": 181810 }, { "epoch": 3.192120648185537, "grad_norm": 0.05140909661338718, "learning_rate": 6.138533894506683e-05, "loss": 0.8409, "step": 181820 }, { "epoch": 3.1922962130655383, "grad_norm": 0.05919962069762185, "learning_rate": 6.137512660842894e-05, "loss": 0.8393, "step": 181830 }, { "epoch": 3.19247177794554, "grad_norm": 0.05825087938017819, "learning_rate": 6.136491477384582e-05, "loss": 0.8432, "step": 181840 }, { "epoch": 3.1926473428255413, "grad_norm": 0.06781563772196901, "learning_rate": 6.13547034414468e-05, "loss": 0.8372, "step": 181850 }, { "epoch": 3.192822907705543, "grad_norm": 0.04596141614422917, "learning_rate": 6.134449261136122e-05, "loss": 0.8446, "step": 181860 }, { "epoch": 3.192998472585544, "grad_norm": 0.05642820560148716, "learning_rate": 6.13342822837186e-05, "loss": 0.8388, "step": 181870 }, { "epoch": 3.1931740374655453, "grad_norm": 0.05614329572333585, "learning_rate": 6.132407245864818e-05, "loss": 0.8417, "step": 181880 }, { "epoch": 3.1933496023455468, "grad_norm": 0.05405060808639001, "learning_rate": 6.131386313627938e-05, "loss": 0.8404, "step": 181890 }, { "epoch": 3.1935251672255482, "grad_norm": 0.05970005965031111, "learning_rate": 6.130365431674154e-05, "loss": 0.8456, "step": 181900 }, { "epoch": 3.1937007321055497, "grad_norm": 0.05468746206602166, "learning_rate": 6.129344600016402e-05, "loss": 0.8384, "step": 181910 }, { "epoch": 3.193876296985551, "grad_norm": 0.053214234079784664, "learning_rate": 6.12832381866761e-05, "loss": 0.8353, "step": 181920 }, { "epoch": 3.1940518618655522, "grad_norm": 0.04791409653456163, "learning_rate": 6.12730308764072e-05, "loss": 0.8357, "step": 181930 }, { "epoch": 3.1942274267455537, "grad_norm": 0.0488873991121105, "learning_rate": 6.126282406948664e-05, "loss": 0.8425, "step": 181940 }, { "epoch": 3.194402991625555, "grad_norm": 0.0666228415934281, "learning_rate": 6.12526177660437e-05, "loss": 0.8385, "step": 181950 }, { "epoch": 3.1945785565055567, "grad_norm": 0.05396864517891219, "learning_rate": 6.124241196620768e-05, "loss": 0.8304, "step": 181960 }, { "epoch": 3.194754121385558, "grad_norm": 0.06881622362483647, "learning_rate": 6.123220667010793e-05, "loss": 0.8452, "step": 181970 }, { "epoch": 3.1949296862655596, "grad_norm": 0.05014823767366758, "learning_rate": 6.122200187787377e-05, "loss": 0.8393, "step": 181980 }, { "epoch": 3.1951052511455607, "grad_norm": 0.054208582113349175, "learning_rate": 6.121179758963443e-05, "loss": 0.8389, "step": 181990 }, { "epoch": 3.195280816025562, "grad_norm": 0.05154122155542282, "learning_rate": 6.120159380551927e-05, "loss": 0.8475, "step": 182000 }, { "epoch": 3.1954563809055636, "grad_norm": 0.05733507048766492, "learning_rate": 6.119139052565754e-05, "loss": 0.8359, "step": 182010 }, { "epoch": 3.195631945785565, "grad_norm": 0.05079278996951382, "learning_rate": 6.118118775017853e-05, "loss": 0.8424, "step": 182020 }, { "epoch": 3.1958075106655666, "grad_norm": 0.05151582199218898, "learning_rate": 6.117098547921147e-05, "loss": 0.8391, "step": 182030 }, { "epoch": 3.195983075545568, "grad_norm": 0.04541303447347507, "learning_rate": 6.116078371288568e-05, "loss": 0.842, "step": 182040 }, { "epoch": 3.196158640425569, "grad_norm": 0.04908470278098667, "learning_rate": 6.11505824513304e-05, "loss": 0.8345, "step": 182050 }, { "epoch": 3.1963342053055706, "grad_norm": 0.047887485280586034, "learning_rate": 6.114038169467485e-05, "loss": 0.8389, "step": 182060 }, { "epoch": 3.196509770185572, "grad_norm": 0.042305080428833276, "learning_rate": 6.113018144304832e-05, "loss": 0.8412, "step": 182070 }, { "epoch": 3.1966853350655735, "grad_norm": 0.07301964348346865, "learning_rate": 6.111998169658005e-05, "loss": 0.841, "step": 182080 }, { "epoch": 3.196860899945575, "grad_norm": 0.044341564099918664, "learning_rate": 6.110978245539921e-05, "loss": 0.8439, "step": 182090 }, { "epoch": 3.1970364648255765, "grad_norm": 0.05649070817801077, "learning_rate": 6.10995837196351e-05, "loss": 0.8367, "step": 182100 }, { "epoch": 3.1972120297055775, "grad_norm": 0.066969187609454, "learning_rate": 6.108938548941691e-05, "loss": 0.8404, "step": 182110 }, { "epoch": 3.197387594585579, "grad_norm": 0.051306924391157895, "learning_rate": 6.107918776487388e-05, "loss": 0.8441, "step": 182120 }, { "epoch": 3.1975631594655805, "grad_norm": 0.053459785027181704, "learning_rate": 6.106899054613515e-05, "loss": 0.8354, "step": 182130 }, { "epoch": 3.197738724345582, "grad_norm": 0.05463038000282751, "learning_rate": 6.105879383332999e-05, "loss": 0.8444, "step": 182140 }, { "epoch": 3.1979142892255834, "grad_norm": 0.059790797376772016, "learning_rate": 6.104859762658757e-05, "loss": 0.8449, "step": 182150 }, { "epoch": 3.198089854105585, "grad_norm": 0.057597385023622896, "learning_rate": 6.103840192603702e-05, "loss": 0.8516, "step": 182160 }, { "epoch": 3.198265418985586, "grad_norm": 0.06062357707518469, "learning_rate": 6.1028206731807636e-05, "loss": 0.8366, "step": 182170 }, { "epoch": 3.1984409838655874, "grad_norm": 0.04586346898616864, "learning_rate": 6.101801204402854e-05, "loss": 0.8403, "step": 182180 }, { "epoch": 3.198616548745589, "grad_norm": 0.0652882973306885, "learning_rate": 6.1007817862828874e-05, "loss": 0.8356, "step": 182190 }, { "epoch": 3.1987921136255903, "grad_norm": 0.062079736957871695, "learning_rate": 6.099762418833779e-05, "loss": 0.8319, "step": 182200 }, { "epoch": 3.198967678505592, "grad_norm": 0.05322169419431621, "learning_rate": 6.098743102068451e-05, "loss": 0.8438, "step": 182210 }, { "epoch": 3.1991432433855933, "grad_norm": 0.0499560578471252, "learning_rate": 6.097723835999814e-05, "loss": 0.8446, "step": 182220 }, { "epoch": 3.199318808265595, "grad_norm": 0.053074478267723726, "learning_rate": 6.096704620640781e-05, "loss": 0.8488, "step": 182230 }, { "epoch": 3.199494373145596, "grad_norm": 0.047905661519592434, "learning_rate": 6.09568545600427e-05, "loss": 0.85, "step": 182240 }, { "epoch": 3.1996699380255973, "grad_norm": 0.05866739886760045, "learning_rate": 6.0946663421031925e-05, "loss": 0.8384, "step": 182250 }, { "epoch": 3.1998455029055988, "grad_norm": 0.04776624895116065, "learning_rate": 6.093647278950455e-05, "loss": 0.8468, "step": 182260 }, { "epoch": 3.2000210677856002, "grad_norm": 0.0611431658065939, "learning_rate": 6.0926282665589786e-05, "loss": 0.8325, "step": 182270 }, { "epoch": 3.2001966326656017, "grad_norm": 0.06563225729700933, "learning_rate": 6.09160930494167e-05, "loss": 0.8393, "step": 182280 }, { "epoch": 3.2003721975456028, "grad_norm": 0.053072022903428044, "learning_rate": 6.09059039411144e-05, "loss": 0.8401, "step": 182290 }, { "epoch": 3.2005477624256042, "grad_norm": 0.060454762034126905, "learning_rate": 6.0895715340811955e-05, "loss": 0.8404, "step": 182300 }, { "epoch": 3.2007233273056057, "grad_norm": 0.05975433565106528, "learning_rate": 6.088552724863852e-05, "loss": 0.8335, "step": 182310 }, { "epoch": 3.200898892185607, "grad_norm": 0.06521592761429809, "learning_rate": 6.087533966472312e-05, "loss": 0.8357, "step": 182320 }, { "epoch": 3.2010744570656087, "grad_norm": 0.05406160841041899, "learning_rate": 6.086515258919483e-05, "loss": 0.8369, "step": 182330 }, { "epoch": 3.20125002194561, "grad_norm": 0.05153329193579625, "learning_rate": 6.085496602218278e-05, "loss": 0.8466, "step": 182340 }, { "epoch": 3.2014255868256116, "grad_norm": 0.04938554590637465, "learning_rate": 6.0844779963816036e-05, "loss": 0.8424, "step": 182350 }, { "epoch": 3.2016011517056127, "grad_norm": 0.09072078701705143, "learning_rate": 6.0834594414223586e-05, "loss": 0.8491, "step": 182360 }, { "epoch": 3.201776716585614, "grad_norm": 0.04972151028487942, "learning_rate": 6.0824409373534494e-05, "loss": 0.8461, "step": 182370 }, { "epoch": 3.2019522814656156, "grad_norm": 0.06257295192642903, "learning_rate": 6.081422484187789e-05, "loss": 0.8404, "step": 182380 }, { "epoch": 3.202127846345617, "grad_norm": 0.060216047596058614, "learning_rate": 6.0804040819382734e-05, "loss": 0.8373, "step": 182390 }, { "epoch": 3.2023034112256186, "grad_norm": 0.05595882019254314, "learning_rate": 6.079385730617808e-05, "loss": 0.852, "step": 182400 }, { "epoch": 3.2024789761056196, "grad_norm": 0.0538954341546527, "learning_rate": 6.0783674302392965e-05, "loss": 0.8438, "step": 182410 }, { "epoch": 3.202654540985621, "grad_norm": 0.06123958631801769, "learning_rate": 6.0773491808156424e-05, "loss": 0.8398, "step": 182420 }, { "epoch": 3.2028301058656226, "grad_norm": 0.047406358809918454, "learning_rate": 6.07633098235974e-05, "loss": 0.8404, "step": 182430 }, { "epoch": 3.203005670745624, "grad_norm": 0.05820301410760285, "learning_rate": 6.075312834884499e-05, "loss": 0.8436, "step": 182440 }, { "epoch": 3.2031812356256255, "grad_norm": 0.06579987289206528, "learning_rate": 6.074294738402815e-05, "loss": 0.84, "step": 182450 }, { "epoch": 3.203356800505627, "grad_norm": 0.05881335184028285, "learning_rate": 6.0732766929275905e-05, "loss": 0.8403, "step": 182460 }, { "epoch": 3.2035323653856285, "grad_norm": 0.05475786496535445, "learning_rate": 6.072258698471718e-05, "loss": 0.8399, "step": 182470 }, { "epoch": 3.2037079302656295, "grad_norm": 0.06964063776722025, "learning_rate": 6.071240755048102e-05, "loss": 0.8377, "step": 182480 }, { "epoch": 3.203883495145631, "grad_norm": 0.043912630640128826, "learning_rate": 6.070222862669639e-05, "loss": 0.8394, "step": 182490 }, { "epoch": 3.2040590600256325, "grad_norm": 0.043630814570068434, "learning_rate": 6.06920502134922e-05, "loss": 0.8443, "step": 182500 }, { "epoch": 3.204234624905634, "grad_norm": 0.05676365890298951, "learning_rate": 6.068187231099749e-05, "loss": 0.841, "step": 182510 }, { "epoch": 3.2044101897856354, "grad_norm": 0.0524725399273576, "learning_rate": 6.067169491934122e-05, "loss": 0.8323, "step": 182520 }, { "epoch": 3.204585754665637, "grad_norm": 0.05903638234577931, "learning_rate": 6.0661518038652266e-05, "loss": 0.8402, "step": 182530 }, { "epoch": 3.204761319545638, "grad_norm": 0.049588624321469314, "learning_rate": 6.065134166905958e-05, "loss": 0.8413, "step": 182540 }, { "epoch": 3.2049368844256394, "grad_norm": 0.05972539313008733, "learning_rate": 6.064116581069216e-05, "loss": 0.8393, "step": 182550 }, { "epoch": 3.205112449305641, "grad_norm": 0.05256699013401026, "learning_rate": 6.06309904636789e-05, "loss": 0.8408, "step": 182560 }, { "epoch": 3.2052880141856424, "grad_norm": 0.052990688414644445, "learning_rate": 6.062081562814873e-05, "loss": 0.8377, "step": 182570 }, { "epoch": 3.205463579065644, "grad_norm": 0.05338790982220299, "learning_rate": 6.061064130423056e-05, "loss": 0.8476, "step": 182580 }, { "epoch": 3.2056391439456453, "grad_norm": 0.05381299698364561, "learning_rate": 6.060046749205334e-05, "loss": 0.8421, "step": 182590 }, { "epoch": 3.2058147088256463, "grad_norm": 0.054385987407072296, "learning_rate": 6.059029419174587e-05, "loss": 0.8372, "step": 182600 }, { "epoch": 3.205990273705648, "grad_norm": 0.06776046142586827, "learning_rate": 6.058012140343716e-05, "loss": 0.8432, "step": 182610 }, { "epoch": 3.2061658385856493, "grad_norm": 0.05906823183334407, "learning_rate": 6.056994912725605e-05, "loss": 0.8364, "step": 182620 }, { "epoch": 3.2063414034656508, "grad_norm": 0.04943058099345212, "learning_rate": 6.055977736333146e-05, "loss": 0.8384, "step": 182630 }, { "epoch": 3.2065169683456523, "grad_norm": 0.04654087457060079, "learning_rate": 6.054960611179221e-05, "loss": 0.8386, "step": 182640 }, { "epoch": 3.2066925332256537, "grad_norm": 0.056219770291054516, "learning_rate": 6.0539435372767214e-05, "loss": 0.8433, "step": 182650 }, { "epoch": 3.2068680981056548, "grad_norm": 0.05576134664981089, "learning_rate": 6.052926514638534e-05, "loss": 0.8489, "step": 182660 }, { "epoch": 3.2070436629856562, "grad_norm": 0.08577693411709156, "learning_rate": 6.051909543277537e-05, "loss": 0.8409, "step": 182670 }, { "epoch": 3.2072192278656577, "grad_norm": 0.04872839184403289, "learning_rate": 6.0508926232066276e-05, "loss": 0.8401, "step": 182680 }, { "epoch": 3.207394792745659, "grad_norm": 0.05544485125890247, "learning_rate": 6.049875754438686e-05, "loss": 0.8466, "step": 182690 }, { "epoch": 3.2075703576256607, "grad_norm": 0.061754028340142646, "learning_rate": 6.048858936986595e-05, "loss": 0.8435, "step": 182700 }, { "epoch": 3.207745922505662, "grad_norm": 0.057559244784733676, "learning_rate": 6.0478421708632315e-05, "loss": 0.8404, "step": 182710 }, { "epoch": 3.207921487385663, "grad_norm": 0.05004118082966457, "learning_rate": 6.046825456081488e-05, "loss": 0.8488, "step": 182720 }, { "epoch": 3.2080970522656647, "grad_norm": 0.06973176060086556, "learning_rate": 6.045808792654244e-05, "loss": 0.8391, "step": 182730 }, { "epoch": 3.208272617145666, "grad_norm": 0.055168568862335704, "learning_rate": 6.0447921805943764e-05, "loss": 0.8422, "step": 182740 }, { "epoch": 3.2084481820256676, "grad_norm": 0.04871311466032282, "learning_rate": 6.0437756199147724e-05, "loss": 0.8386, "step": 182750 }, { "epoch": 3.208623746905669, "grad_norm": 0.0719391803165989, "learning_rate": 6.0427591106283064e-05, "loss": 0.8387, "step": 182760 }, { "epoch": 3.2087993117856706, "grad_norm": 0.05509128335042559, "learning_rate": 6.041742652747858e-05, "loss": 0.8384, "step": 182770 }, { "epoch": 3.2089748766656716, "grad_norm": 0.05394860983339904, "learning_rate": 6.040726246286312e-05, "loss": 0.8461, "step": 182780 }, { "epoch": 3.209150441545673, "grad_norm": 0.048007156265003924, "learning_rate": 6.039709891256542e-05, "loss": 0.842, "step": 182790 }, { "epoch": 3.2093260064256746, "grad_norm": 0.04644788340640933, "learning_rate": 6.0386935876714254e-05, "loss": 0.842, "step": 182800 }, { "epoch": 3.209501571305676, "grad_norm": 0.04545644412955526, "learning_rate": 6.0376773355438376e-05, "loss": 0.852, "step": 182810 }, { "epoch": 3.2096771361856775, "grad_norm": 0.1168902964683629, "learning_rate": 6.036661134886659e-05, "loss": 0.8403, "step": 182820 }, { "epoch": 3.209852701065679, "grad_norm": 0.053658159274901106, "learning_rate": 6.035644985712762e-05, "loss": 0.839, "step": 182830 }, { "epoch": 3.21002826594568, "grad_norm": 0.06305368560410879, "learning_rate": 6.034628888035018e-05, "loss": 0.8391, "step": 182840 }, { "epoch": 3.2102038308256815, "grad_norm": 0.05813728050827306, "learning_rate": 6.0336128418663086e-05, "loss": 0.8423, "step": 182850 }, { "epoch": 3.210379395705683, "grad_norm": 0.06015076780971664, "learning_rate": 6.032596847219506e-05, "loss": 0.8316, "step": 182860 }, { "epoch": 3.2105549605856845, "grad_norm": 0.0591441269570068, "learning_rate": 6.03158090410748e-05, "loss": 0.8418, "step": 182870 }, { "epoch": 3.210730525465686, "grad_norm": 0.06019756228224484, "learning_rate": 6.030565012543099e-05, "loss": 0.8421, "step": 182880 }, { "epoch": 3.2109060903456874, "grad_norm": 0.05665530678958933, "learning_rate": 6.029549172539243e-05, "loss": 0.8417, "step": 182890 }, { "epoch": 3.2110816552256884, "grad_norm": 0.04993936186851527, "learning_rate": 6.02853338410878e-05, "loss": 0.835, "step": 182900 }, { "epoch": 3.21125722010569, "grad_norm": 0.04339131722134552, "learning_rate": 6.027517647264577e-05, "loss": 0.8386, "step": 182910 }, { "epoch": 3.2114327849856914, "grad_norm": 0.05487788614460194, "learning_rate": 6.026501962019506e-05, "loss": 0.8413, "step": 182920 }, { "epoch": 3.211608349865693, "grad_norm": 0.07762923751623055, "learning_rate": 6.025486328386437e-05, "loss": 0.8412, "step": 182930 }, { "epoch": 3.2117839147456944, "grad_norm": 0.07884404565607263, "learning_rate": 6.024470746378235e-05, "loss": 0.8438, "step": 182940 }, { "epoch": 3.211959479625696, "grad_norm": 0.06119657598579873, "learning_rate": 6.023455216007772e-05, "loss": 0.8441, "step": 182950 }, { "epoch": 3.212135044505697, "grad_norm": 0.05136704222626834, "learning_rate": 6.022439737287913e-05, "loss": 0.8381, "step": 182960 }, { "epoch": 3.2123106093856983, "grad_norm": 0.06293635023686944, "learning_rate": 6.0214243102315225e-05, "loss": 0.8405, "step": 182970 }, { "epoch": 3.2124861742657, "grad_norm": 0.050905877559539466, "learning_rate": 6.020408934851468e-05, "loss": 0.839, "step": 182980 }, { "epoch": 3.2126617391457013, "grad_norm": 0.07580665417523146, "learning_rate": 6.0193936111606144e-05, "loss": 0.8417, "step": 182990 }, { "epoch": 3.212837304025703, "grad_norm": 0.055753103211993865, "learning_rate": 6.0183783391718265e-05, "loss": 0.847, "step": 183000 }, { "epoch": 3.2130128689057043, "grad_norm": 0.05013370899580317, "learning_rate": 6.017363118897963e-05, "loss": 0.8477, "step": 183010 }, { "epoch": 3.2131884337857057, "grad_norm": 0.05541218074631912, "learning_rate": 6.016347950351895e-05, "loss": 0.8348, "step": 183020 }, { "epoch": 3.2133639986657068, "grad_norm": 0.04276668118115214, "learning_rate": 6.015332833546484e-05, "loss": 0.8476, "step": 183030 }, { "epoch": 3.2135395635457082, "grad_norm": 0.04649889202360455, "learning_rate": 6.014317768494586e-05, "loss": 0.8396, "step": 183040 }, { "epoch": 3.2137151284257097, "grad_norm": 0.04616187710762655, "learning_rate": 6.0133027552090616e-05, "loss": 0.8383, "step": 183050 }, { "epoch": 3.213890693305711, "grad_norm": 0.05224692051569096, "learning_rate": 6.0122877937027776e-05, "loss": 0.8407, "step": 183060 }, { "epoch": 3.2140662581857127, "grad_norm": 0.05203576035195779, "learning_rate": 6.011272883988592e-05, "loss": 0.8402, "step": 183070 }, { "epoch": 3.2142418230657137, "grad_norm": 0.04434661258853681, "learning_rate": 6.010258026079359e-05, "loss": 0.849, "step": 183080 }, { "epoch": 3.214417387945715, "grad_norm": 0.04715916164946819, "learning_rate": 6.0092432199879435e-05, "loss": 0.8412, "step": 183090 }, { "epoch": 3.2145929528257167, "grad_norm": 0.04933349538880327, "learning_rate": 6.0082284657272e-05, "loss": 0.8342, "step": 183100 }, { "epoch": 3.214768517705718, "grad_norm": 0.0474979508663132, "learning_rate": 6.0072137633099826e-05, "loss": 0.8473, "step": 183110 }, { "epoch": 3.2149440825857196, "grad_norm": 0.05649803995971455, "learning_rate": 6.006199112749155e-05, "loss": 0.8491, "step": 183120 }, { "epoch": 3.215119647465721, "grad_norm": 0.050262384849381996, "learning_rate": 6.005184514057568e-05, "loss": 0.8414, "step": 183130 }, { "epoch": 3.2152952123457226, "grad_norm": 0.05834383015721292, "learning_rate": 6.00416996724808e-05, "loss": 0.8358, "step": 183140 }, { "epoch": 3.2154707772257236, "grad_norm": 0.044583025618811827, "learning_rate": 6.003155472333542e-05, "loss": 0.8387, "step": 183150 }, { "epoch": 3.215646342105725, "grad_norm": 0.05082844261878605, "learning_rate": 6.002141029326809e-05, "loss": 0.8375, "step": 183160 }, { "epoch": 3.2158219069857266, "grad_norm": 0.05481453197525674, "learning_rate": 6.001126638240735e-05, "loss": 0.8431, "step": 183170 }, { "epoch": 3.215997471865728, "grad_norm": 0.05106928673592324, "learning_rate": 6.0001122990881684e-05, "loss": 0.8423, "step": 183180 }, { "epoch": 3.2161730367457295, "grad_norm": 0.04485056603545711, "learning_rate": 5.9990980118819695e-05, "loss": 0.8404, "step": 183190 }, { "epoch": 3.216348601625731, "grad_norm": 0.0668341130987412, "learning_rate": 5.998083776634985e-05, "loss": 0.8332, "step": 183200 }, { "epoch": 3.216524166505732, "grad_norm": 0.058304871266889456, "learning_rate": 5.997069593360064e-05, "loss": 0.8407, "step": 183210 }, { "epoch": 3.2166997313857335, "grad_norm": 0.061452219472675984, "learning_rate": 5.996055462070054e-05, "loss": 0.8389, "step": 183220 }, { "epoch": 3.216875296265735, "grad_norm": 0.06089344124980045, "learning_rate": 5.9950413827778116e-05, "loss": 0.8452, "step": 183230 }, { "epoch": 3.2170508611457365, "grad_norm": 0.061227601815809975, "learning_rate": 5.994027355496181e-05, "loss": 0.846, "step": 183240 }, { "epoch": 3.217226426025738, "grad_norm": 0.051034790972402815, "learning_rate": 5.9930133802380095e-05, "loss": 0.838, "step": 183250 }, { "epoch": 3.2174019909057394, "grad_norm": 0.04960386533592311, "learning_rate": 5.991999457016148e-05, "loss": 0.8376, "step": 183260 }, { "epoch": 3.2175775557857405, "grad_norm": 0.05516100860356233, "learning_rate": 5.9909855858434394e-05, "loss": 0.8427, "step": 183270 }, { "epoch": 3.217753120665742, "grad_norm": 0.05353342041424053, "learning_rate": 5.989971766732728e-05, "loss": 0.8436, "step": 183280 }, { "epoch": 3.2179286855457434, "grad_norm": 0.04606434128896412, "learning_rate": 5.988957999696865e-05, "loss": 0.8449, "step": 183290 }, { "epoch": 3.218104250425745, "grad_norm": 0.051861321732415, "learning_rate": 5.987944284748693e-05, "loss": 0.8422, "step": 183300 }, { "epoch": 3.2182798153057464, "grad_norm": 0.04829771587866355, "learning_rate": 5.986930621901055e-05, "loss": 0.8406, "step": 183310 }, { "epoch": 3.218455380185748, "grad_norm": 0.0480527520069731, "learning_rate": 5.9859170111667907e-05, "loss": 0.8402, "step": 183320 }, { "epoch": 3.218630945065749, "grad_norm": 0.050250119059590884, "learning_rate": 5.98490345255875e-05, "loss": 0.8387, "step": 183330 }, { "epoch": 3.2188065099457503, "grad_norm": 0.04987054380135817, "learning_rate": 5.98388994608977e-05, "loss": 0.8478, "step": 183340 }, { "epoch": 3.218982074825752, "grad_norm": 0.04277976154956029, "learning_rate": 5.98287649177269e-05, "loss": 0.8395, "step": 183350 }, { "epoch": 3.2191576397057533, "grad_norm": 0.05821579581173413, "learning_rate": 5.9818630896203575e-05, "loss": 0.8439, "step": 183360 }, { "epoch": 3.219333204585755, "grad_norm": 0.05080756272594297, "learning_rate": 5.980849739645612e-05, "loss": 0.8371, "step": 183370 }, { "epoch": 3.2195087694657563, "grad_norm": 0.0522340925631416, "learning_rate": 5.979836441861284e-05, "loss": 0.8452, "step": 183380 }, { "epoch": 3.2196843343457573, "grad_norm": 0.08372130266599613, "learning_rate": 5.978823196280219e-05, "loss": 0.832, "step": 183390 }, { "epoch": 3.2198598992257588, "grad_norm": 0.045802601799839464, "learning_rate": 5.977810002915257e-05, "loss": 0.8338, "step": 183400 }, { "epoch": 3.2200354641057602, "grad_norm": 0.05547046421063146, "learning_rate": 5.976796861779229e-05, "loss": 0.8341, "step": 183410 }, { "epoch": 3.2202110289857617, "grad_norm": 0.04975963732296029, "learning_rate": 5.975783772884976e-05, "loss": 0.8401, "step": 183420 }, { "epoch": 3.220386593865763, "grad_norm": 0.05661889257578196, "learning_rate": 5.974770736245332e-05, "loss": 0.8376, "step": 183430 }, { "epoch": 3.2205621587457647, "grad_norm": 0.06830168258314066, "learning_rate": 5.973757751873138e-05, "loss": 0.8439, "step": 183440 }, { "epoch": 3.2207377236257657, "grad_norm": 0.0773152281467056, "learning_rate": 5.972744819781218e-05, "loss": 0.8453, "step": 183450 }, { "epoch": 3.220913288505767, "grad_norm": 0.06231498025060511, "learning_rate": 5.971731939982413e-05, "loss": 0.8359, "step": 183460 }, { "epoch": 3.2210888533857687, "grad_norm": 0.05863489675395209, "learning_rate": 5.970719112489561e-05, "loss": 0.8392, "step": 183470 }, { "epoch": 3.22126441826577, "grad_norm": 0.060568281960057514, "learning_rate": 5.9697063373154836e-05, "loss": 0.842, "step": 183480 }, { "epoch": 3.2214399831457716, "grad_norm": 0.055324290930755624, "learning_rate": 5.96869361447302e-05, "loss": 0.8409, "step": 183490 }, { "epoch": 3.221615548025773, "grad_norm": 0.05322967844418525, "learning_rate": 5.9676809439749984e-05, "loss": 0.8386, "step": 183500 }, { "epoch": 3.221791112905774, "grad_norm": 0.06601640520249595, "learning_rate": 5.966668325834257e-05, "loss": 0.8502, "step": 183510 }, { "epoch": 3.2219666777857756, "grad_norm": 0.06521150238311982, "learning_rate": 5.965655760063614e-05, "loss": 0.8431, "step": 183520 }, { "epoch": 3.222142242665777, "grad_norm": 0.05670593922023839, "learning_rate": 5.964643246675905e-05, "loss": 0.8437, "step": 183530 }, { "epoch": 3.2223178075457786, "grad_norm": 0.043522051019804074, "learning_rate": 5.963630785683966e-05, "loss": 0.8408, "step": 183540 }, { "epoch": 3.22249337242578, "grad_norm": 0.05918710649010191, "learning_rate": 5.96261837710061e-05, "loss": 0.8476, "step": 183550 }, { "epoch": 3.2226689373057815, "grad_norm": 0.06887999560193957, "learning_rate": 5.9616060209386725e-05, "loss": 0.8367, "step": 183560 }, { "epoch": 3.2228445021857826, "grad_norm": 0.05233673931832359, "learning_rate": 5.960593717210985e-05, "loss": 0.8379, "step": 183570 }, { "epoch": 3.223020067065784, "grad_norm": 0.051320575675150686, "learning_rate": 5.959581465930365e-05, "loss": 0.8353, "step": 183580 }, { "epoch": 3.2231956319457855, "grad_norm": 0.04265433727174777, "learning_rate": 5.958569267109639e-05, "loss": 0.853, "step": 183590 }, { "epoch": 3.223371196825787, "grad_norm": 0.04164991691908554, "learning_rate": 5.957557120761637e-05, "loss": 0.8373, "step": 183600 }, { "epoch": 3.2235467617057885, "grad_norm": 0.06736083235494056, "learning_rate": 5.9565450268991807e-05, "loss": 0.8452, "step": 183610 }, { "epoch": 3.22372232658579, "grad_norm": 0.04754517686004884, "learning_rate": 5.9555329855350914e-05, "loss": 0.842, "step": 183620 }, { "epoch": 3.223897891465791, "grad_norm": 0.04910841714197757, "learning_rate": 5.954520996682194e-05, "loss": 0.8436, "step": 183630 }, { "epoch": 3.2240734563457925, "grad_norm": 0.045219140899869475, "learning_rate": 5.953509060353312e-05, "loss": 0.8482, "step": 183640 }, { "epoch": 3.224249021225794, "grad_norm": 0.05286420791951403, "learning_rate": 5.9524971765612614e-05, "loss": 0.8423, "step": 183650 }, { "epoch": 3.2244245861057954, "grad_norm": 0.06390545481050647, "learning_rate": 5.951485345318867e-05, "loss": 0.8321, "step": 183660 }, { "epoch": 3.224600150985797, "grad_norm": 0.04712143559383382, "learning_rate": 5.9504735666389485e-05, "loss": 0.841, "step": 183670 }, { "epoch": 3.2247757158657984, "grad_norm": 0.05421173545578668, "learning_rate": 5.9494618405343294e-05, "loss": 0.8401, "step": 183680 }, { "epoch": 3.2249512807458, "grad_norm": 0.04399590813400492, "learning_rate": 5.948450167017819e-05, "loss": 0.8388, "step": 183690 }, { "epoch": 3.225126845625801, "grad_norm": 0.06445653695704215, "learning_rate": 5.947438546102242e-05, "loss": 0.8385, "step": 183700 }, { "epoch": 3.2253024105058024, "grad_norm": 0.05765108368512892, "learning_rate": 5.946426977800419e-05, "loss": 0.8435, "step": 183710 }, { "epoch": 3.225477975385804, "grad_norm": 0.054151411227574595, "learning_rate": 5.945415462125157e-05, "loss": 0.8416, "step": 183720 }, { "epoch": 3.2256535402658053, "grad_norm": 0.04926472278299313, "learning_rate": 5.944403999089276e-05, "loss": 0.8359, "step": 183730 }, { "epoch": 3.225829105145807, "grad_norm": 0.04660675254526335, "learning_rate": 5.9433925887055974e-05, "loss": 0.8435, "step": 183740 }, { "epoch": 3.226004670025808, "grad_norm": 0.052790955907457665, "learning_rate": 5.942381230986929e-05, "loss": 0.8413, "step": 183750 }, { "epoch": 3.2261802349058093, "grad_norm": 0.07922403742304256, "learning_rate": 5.941369925946086e-05, "loss": 0.8426, "step": 183760 }, { "epoch": 3.2263557997858108, "grad_norm": 0.06250791351888607, "learning_rate": 5.940358673595883e-05, "loss": 0.8317, "step": 183770 }, { "epoch": 3.2265313646658123, "grad_norm": 0.062449535828149556, "learning_rate": 5.939347473949137e-05, "loss": 0.8436, "step": 183780 }, { "epoch": 3.2267069295458137, "grad_norm": 0.046780172083255384, "learning_rate": 5.938336327018653e-05, "loss": 0.8377, "step": 183790 }, { "epoch": 3.226882494425815, "grad_norm": 0.049973605335239, "learning_rate": 5.9373252328172424e-05, "loss": 0.8388, "step": 183800 }, { "epoch": 3.2270580593058167, "grad_norm": 0.04759958854193068, "learning_rate": 5.936314191357727e-05, "loss": 0.8383, "step": 183810 }, { "epoch": 3.2272336241858177, "grad_norm": 0.05181866474913734, "learning_rate": 5.935303202652901e-05, "loss": 0.843, "step": 183820 }, { "epoch": 3.227409189065819, "grad_norm": 0.06207191502647553, "learning_rate": 5.934292266715584e-05, "loss": 0.8334, "step": 183830 }, { "epoch": 3.2275847539458207, "grad_norm": 0.05897047753666602, "learning_rate": 5.933281383558581e-05, "loss": 0.8408, "step": 183840 }, { "epoch": 3.227760318825822, "grad_norm": 0.04161147693416526, "learning_rate": 5.9322705531947066e-05, "loss": 0.8513, "step": 183850 }, { "epoch": 3.2279358837058236, "grad_norm": 0.05788686137262993, "learning_rate": 5.9312597756367574e-05, "loss": 0.8417, "step": 183860 }, { "epoch": 3.2281114485858247, "grad_norm": 0.060436752803791634, "learning_rate": 5.9302490508975474e-05, "loss": 0.8438, "step": 183870 }, { "epoch": 3.228287013465826, "grad_norm": 0.05148034086205373, "learning_rate": 5.929238378989883e-05, "loss": 0.8408, "step": 183880 }, { "epoch": 3.2284625783458276, "grad_norm": 0.04967402586005994, "learning_rate": 5.9282277599265656e-05, "loss": 0.8504, "step": 183890 }, { "epoch": 3.228638143225829, "grad_norm": 0.05365496498681879, "learning_rate": 5.9272171937204015e-05, "loss": 0.8414, "step": 183900 }, { "epoch": 3.2288137081058306, "grad_norm": 0.05840809502354821, "learning_rate": 5.926206680384199e-05, "loss": 0.8364, "step": 183910 }, { "epoch": 3.228989272985832, "grad_norm": 0.04830722889911971, "learning_rate": 5.925196219930754e-05, "loss": 0.8484, "step": 183920 }, { "epoch": 3.2291648378658335, "grad_norm": 0.04576862168179371, "learning_rate": 5.924185812372873e-05, "loss": 0.8446, "step": 183930 }, { "epoch": 3.2293404027458346, "grad_norm": 0.0672394758396955, "learning_rate": 5.923175457723358e-05, "loss": 0.8451, "step": 183940 }, { "epoch": 3.229515967625836, "grad_norm": 0.05491449487091906, "learning_rate": 5.922165155995013e-05, "loss": 0.8419, "step": 183950 }, { "epoch": 3.2296915325058375, "grad_norm": 0.06693796941025162, "learning_rate": 5.921154907200635e-05, "loss": 0.845, "step": 183960 }, { "epoch": 3.229867097385839, "grad_norm": 0.05918360744421931, "learning_rate": 5.9201447113530225e-05, "loss": 0.8344, "step": 183970 }, { "epoch": 3.2300426622658405, "grad_norm": 0.04974892688485771, "learning_rate": 5.919134568464983e-05, "loss": 0.8398, "step": 183980 }, { "epoch": 3.230218227145842, "grad_norm": 0.10964720098456787, "learning_rate": 5.9181244785493065e-05, "loss": 0.8348, "step": 183990 }, { "epoch": 3.230393792025843, "grad_norm": 0.052649441345362126, "learning_rate": 5.917114441618793e-05, "loss": 0.8362, "step": 184000 }, { "epoch": 3.2305693569058445, "grad_norm": 0.05241615329197365, "learning_rate": 5.916104457686241e-05, "loss": 0.84, "step": 184010 }, { "epoch": 3.230744921785846, "grad_norm": 0.05917770832986534, "learning_rate": 5.915094526764452e-05, "loss": 0.8421, "step": 184020 }, { "epoch": 3.2309204866658474, "grad_norm": 0.050416079272688534, "learning_rate": 5.914084648866213e-05, "loss": 0.8526, "step": 184030 }, { "epoch": 3.231096051545849, "grad_norm": 0.05041624212090278, "learning_rate": 5.9130748240043246e-05, "loss": 0.8394, "step": 184040 }, { "epoch": 3.2312716164258504, "grad_norm": 0.044873587279064264, "learning_rate": 5.9120650521915845e-05, "loss": 0.8393, "step": 184050 }, { "epoch": 3.2314471813058514, "grad_norm": 0.05427813140784777, "learning_rate": 5.911055333440778e-05, "loss": 0.8384, "step": 184060 }, { "epoch": 3.231622746185853, "grad_norm": 0.050351633404048114, "learning_rate": 5.910045667764704e-05, "loss": 0.834, "step": 184070 }, { "epoch": 3.2317983110658544, "grad_norm": 0.06297068283304759, "learning_rate": 5.909036055176157e-05, "loss": 0.8344, "step": 184080 }, { "epoch": 3.231973875945856, "grad_norm": 0.052257075494015794, "learning_rate": 5.908026495687923e-05, "loss": 0.8326, "step": 184090 }, { "epoch": 3.2321494408258573, "grad_norm": 0.057869963405787, "learning_rate": 5.907016989312798e-05, "loss": 0.8326, "step": 184100 }, { "epoch": 3.232325005705859, "grad_norm": 0.05497753159713663, "learning_rate": 5.9060075360635714e-05, "loss": 0.8277, "step": 184110 }, { "epoch": 3.23250057058586, "grad_norm": 0.05616445444858763, "learning_rate": 5.904998135953035e-05, "loss": 0.8384, "step": 184120 }, { "epoch": 3.2326761354658613, "grad_norm": 0.07819876404286732, "learning_rate": 5.903988788993973e-05, "loss": 0.8426, "step": 184130 }, { "epoch": 3.2328517003458628, "grad_norm": 0.04951856588017169, "learning_rate": 5.902979495199178e-05, "loss": 0.8362, "step": 184140 }, { "epoch": 3.2330272652258643, "grad_norm": 0.06437870821698817, "learning_rate": 5.901970254581439e-05, "loss": 0.8363, "step": 184150 }, { "epoch": 3.2332028301058657, "grad_norm": 0.0498382752237453, "learning_rate": 5.90096106715354e-05, "loss": 0.8432, "step": 184160 }, { "epoch": 3.233378394985867, "grad_norm": 0.049996434389085284, "learning_rate": 5.8999519329282685e-05, "loss": 0.8396, "step": 184170 }, { "epoch": 3.2335539598658682, "grad_norm": 0.05859343670168666, "learning_rate": 5.89894285191841e-05, "loss": 0.8462, "step": 184180 }, { "epoch": 3.2337295247458697, "grad_norm": 0.05382630502868404, "learning_rate": 5.897933824136755e-05, "loss": 0.8369, "step": 184190 }, { "epoch": 3.233905089625871, "grad_norm": 0.04714161237565076, "learning_rate": 5.8969248495960805e-05, "loss": 0.8438, "step": 184200 }, { "epoch": 3.2340806545058727, "grad_norm": 0.07707918768868491, "learning_rate": 5.8959159283091724e-05, "loss": 0.8378, "step": 184210 }, { "epoch": 3.234256219385874, "grad_norm": 0.055841691031139284, "learning_rate": 5.894907060288819e-05, "loss": 0.8366, "step": 184220 }, { "epoch": 3.2344317842658756, "grad_norm": 0.04772009713756977, "learning_rate": 5.893898245547798e-05, "loss": 0.8458, "step": 184230 }, { "epoch": 3.2346073491458767, "grad_norm": 0.051881256056288294, "learning_rate": 5.892889484098888e-05, "loss": 0.8346, "step": 184240 }, { "epoch": 3.234782914025878, "grad_norm": 0.05475909407833451, "learning_rate": 5.8918807759548825e-05, "loss": 0.8452, "step": 184250 }, { "epoch": 3.2349584789058796, "grad_norm": 0.05465574307208287, "learning_rate": 5.8908721211285475e-05, "loss": 0.8529, "step": 184260 }, { "epoch": 3.235134043785881, "grad_norm": 0.05144556831330523, "learning_rate": 5.8898635196326714e-05, "loss": 0.8423, "step": 184270 }, { "epoch": 3.2353096086658826, "grad_norm": 0.053965417728487015, "learning_rate": 5.888854971480029e-05, "loss": 0.8318, "step": 184280 }, { "epoch": 3.235485173545884, "grad_norm": 0.0461011939652606, "learning_rate": 5.8878464766834076e-05, "loss": 0.8396, "step": 184290 }, { "epoch": 3.235660738425885, "grad_norm": 0.05123982242953701, "learning_rate": 5.886838035255573e-05, "loss": 0.8436, "step": 184300 }, { "epoch": 3.2358363033058866, "grad_norm": 0.0779615933800103, "learning_rate": 5.8858296472093076e-05, "loss": 0.8447, "step": 184310 }, { "epoch": 3.236011868185888, "grad_norm": 0.050231680794836475, "learning_rate": 5.8848213125573934e-05, "loss": 0.8404, "step": 184320 }, { "epoch": 3.2361874330658895, "grad_norm": 0.05149228798870723, "learning_rate": 5.883813031312596e-05, "loss": 0.8399, "step": 184330 }, { "epoch": 3.236362997945891, "grad_norm": 0.05519220327246756, "learning_rate": 5.882804803487697e-05, "loss": 0.8324, "step": 184340 }, { "epoch": 3.2365385628258925, "grad_norm": 0.04384476424110565, "learning_rate": 5.881796629095467e-05, "loss": 0.8424, "step": 184350 }, { "epoch": 3.2367141277058935, "grad_norm": 0.06203020701494398, "learning_rate": 5.880788508148688e-05, "loss": 0.8332, "step": 184360 }, { "epoch": 3.236889692585895, "grad_norm": 0.06469797748721112, "learning_rate": 5.879780440660122e-05, "loss": 0.8457, "step": 184370 }, { "epoch": 3.2370652574658965, "grad_norm": 0.051961817527365436, "learning_rate": 5.878772426642549e-05, "loss": 0.8421, "step": 184380 }, { "epoch": 3.237240822345898, "grad_norm": 0.0594154931507186, "learning_rate": 5.877764466108741e-05, "loss": 0.8471, "step": 184390 }, { "epoch": 3.2374163872258994, "grad_norm": 0.06481084755526148, "learning_rate": 5.8767565590714605e-05, "loss": 0.8389, "step": 184400 }, { "epoch": 3.237591952105901, "grad_norm": 0.06432595440013829, "learning_rate": 5.8757487055434856e-05, "loss": 0.8475, "step": 184410 }, { "epoch": 3.237767516985902, "grad_norm": 0.04659090534825186, "learning_rate": 5.874740905537589e-05, "loss": 0.8412, "step": 184420 }, { "epoch": 3.2379430818659034, "grad_norm": 0.04836031161464134, "learning_rate": 5.873733159066529e-05, "loss": 0.8393, "step": 184430 }, { "epoch": 3.238118646745905, "grad_norm": 0.05528361472318778, "learning_rate": 5.872725466143081e-05, "loss": 0.841, "step": 184440 }, { "epoch": 3.2382942116259064, "grad_norm": 0.04645003760108794, "learning_rate": 5.871717826780011e-05, "loss": 0.837, "step": 184450 }, { "epoch": 3.238469776505908, "grad_norm": 0.049358917780806456, "learning_rate": 5.870710240990092e-05, "loss": 0.8349, "step": 184460 }, { "epoch": 3.2386453413859093, "grad_norm": 0.0560844531583116, "learning_rate": 5.869702708786078e-05, "loss": 0.8335, "step": 184470 }, { "epoch": 3.238820906265911, "grad_norm": 0.05068903589402944, "learning_rate": 5.868695230180744e-05, "loss": 0.8411, "step": 184480 }, { "epoch": 3.238996471145912, "grad_norm": 0.05642652564972516, "learning_rate": 5.8676878051868544e-05, "loss": 0.8427, "step": 184490 }, { "epoch": 3.2391720360259133, "grad_norm": 0.05797892053839419, "learning_rate": 5.866680433817169e-05, "loss": 0.8428, "step": 184500 }, { "epoch": 3.239347600905915, "grad_norm": 0.056246288668489706, "learning_rate": 5.8656731160844525e-05, "loss": 0.8539, "step": 184510 }, { "epoch": 3.2395231657859163, "grad_norm": 0.05622180333995811, "learning_rate": 5.864665852001471e-05, "loss": 0.8507, "step": 184520 }, { "epoch": 3.2396987306659177, "grad_norm": 0.06898833326503896, "learning_rate": 5.863658641580986e-05, "loss": 0.8418, "step": 184530 }, { "epoch": 3.2398742955459188, "grad_norm": 0.05215319828829471, "learning_rate": 5.862651484835756e-05, "loss": 0.8417, "step": 184540 }, { "epoch": 3.2400498604259202, "grad_norm": 0.05421786380705445, "learning_rate": 5.861644381778543e-05, "loss": 0.8395, "step": 184550 }, { "epoch": 3.2402254253059217, "grad_norm": 0.045759195715106026, "learning_rate": 5.860637332422112e-05, "loss": 0.8485, "step": 184560 }, { "epoch": 3.240400990185923, "grad_norm": 0.04435382897843194, "learning_rate": 5.859630336779215e-05, "loss": 0.8444, "step": 184570 }, { "epoch": 3.2405765550659247, "grad_norm": 0.05660823812640221, "learning_rate": 5.858623394862615e-05, "loss": 0.8406, "step": 184580 }, { "epoch": 3.240752119945926, "grad_norm": 0.05785421848903796, "learning_rate": 5.857616506685071e-05, "loss": 0.8423, "step": 184590 }, { "epoch": 3.2409276848259276, "grad_norm": 0.05798908314494106, "learning_rate": 5.856609672259337e-05, "loss": 0.8345, "step": 184600 }, { "epoch": 3.2411032497059287, "grad_norm": 0.05422642233516043, "learning_rate": 5.855602891598171e-05, "loss": 0.8319, "step": 184610 }, { "epoch": 3.24127881458593, "grad_norm": 0.05811184755464429, "learning_rate": 5.854596164714329e-05, "loss": 0.8414, "step": 184620 }, { "epoch": 3.2414543794659316, "grad_norm": 0.05118443933370086, "learning_rate": 5.8535894916205723e-05, "loss": 0.8456, "step": 184630 }, { "epoch": 3.241629944345933, "grad_norm": 0.055364233549595514, "learning_rate": 5.8525828723296475e-05, "loss": 0.8412, "step": 184640 }, { "epoch": 3.2418055092259346, "grad_norm": 0.07850177611529766, "learning_rate": 5.8515763068543084e-05, "loss": 0.8371, "step": 184650 }, { "epoch": 3.241981074105936, "grad_norm": 0.05681733528276603, "learning_rate": 5.850569795207319e-05, "loss": 0.8472, "step": 184660 }, { "epoch": 3.242156638985937, "grad_norm": 0.050619696418904604, "learning_rate": 5.8495633374014194e-05, "loss": 0.8394, "step": 184670 }, { "epoch": 3.2423322038659386, "grad_norm": 0.05734114310136564, "learning_rate": 5.848556933449367e-05, "loss": 0.8313, "step": 184680 }, { "epoch": 3.24250776874594, "grad_norm": 0.04820754608430847, "learning_rate": 5.847550583363912e-05, "loss": 0.8346, "step": 184690 }, { "epoch": 3.2426833336259415, "grad_norm": 0.051014242854346734, "learning_rate": 5.8465442871578125e-05, "loss": 0.8379, "step": 184700 }, { "epoch": 3.242858898505943, "grad_norm": 0.049970429999527013, "learning_rate": 5.845538044843807e-05, "loss": 0.8436, "step": 184710 }, { "epoch": 3.2430344633859445, "grad_norm": 0.05771402651518539, "learning_rate": 5.84453185643465e-05, "loss": 0.8437, "step": 184720 }, { "epoch": 3.2432100282659455, "grad_norm": 0.053035948859372135, "learning_rate": 5.843525721943093e-05, "loss": 0.8446, "step": 184730 }, { "epoch": 3.243385593145947, "grad_norm": 0.06857716020508244, "learning_rate": 5.842519641381876e-05, "loss": 0.8399, "step": 184740 }, { "epoch": 3.2435611580259485, "grad_norm": 0.0552432223529788, "learning_rate": 5.8415136147637525e-05, "loss": 0.8354, "step": 184750 }, { "epoch": 3.24373672290595, "grad_norm": 0.06411136386925215, "learning_rate": 5.8405076421014726e-05, "loss": 0.8368, "step": 184760 }, { "epoch": 3.2439122877859514, "grad_norm": 0.07209185800077794, "learning_rate": 5.839501723407771e-05, "loss": 0.8454, "step": 184770 }, { "epoch": 3.244087852665953, "grad_norm": 0.05476757332128607, "learning_rate": 5.838495858695401e-05, "loss": 0.845, "step": 184780 }, { "epoch": 3.244263417545954, "grad_norm": 0.0473062777620129, "learning_rate": 5.837490047977103e-05, "loss": 0.8381, "step": 184790 }, { "epoch": 3.2444389824259554, "grad_norm": 0.03990737273010354, "learning_rate": 5.8364842912656305e-05, "loss": 0.8498, "step": 184800 }, { "epoch": 3.244614547305957, "grad_norm": 0.049441061129540505, "learning_rate": 5.8354785885737126e-05, "loss": 0.8459, "step": 184810 }, { "epoch": 3.2447901121859584, "grad_norm": 0.05559455795785447, "learning_rate": 5.834472939914098e-05, "loss": 0.8364, "step": 184820 }, { "epoch": 3.24496567706596, "grad_norm": 0.066305322195853, "learning_rate": 5.8334673452995336e-05, "loss": 0.8419, "step": 184830 }, { "epoch": 3.2451412419459613, "grad_norm": 0.06140602264248671, "learning_rate": 5.832461804742751e-05, "loss": 0.839, "step": 184840 }, { "epoch": 3.2453168068259624, "grad_norm": 0.056323392874587194, "learning_rate": 5.8314563182564966e-05, "loss": 0.8476, "step": 184850 }, { "epoch": 3.245492371705964, "grad_norm": 0.04781422909238236, "learning_rate": 5.830450885853508e-05, "loss": 0.8362, "step": 184860 }, { "epoch": 3.2456679365859653, "grad_norm": 0.055807842087495736, "learning_rate": 5.8294455075465295e-05, "loss": 0.8531, "step": 184870 }, { "epoch": 3.245843501465967, "grad_norm": 0.06007871093167832, "learning_rate": 5.82844018334829e-05, "loss": 0.8465, "step": 184880 }, { "epoch": 3.2460190663459683, "grad_norm": 0.048713451734531524, "learning_rate": 5.82743491327153e-05, "loss": 0.8469, "step": 184890 }, { "epoch": 3.2461946312259697, "grad_norm": 0.05817465659163844, "learning_rate": 5.826429697328996e-05, "loss": 0.8382, "step": 184900 }, { "epoch": 3.2463701961059708, "grad_norm": 0.04884703023014615, "learning_rate": 5.82542453553341e-05, "loss": 0.8356, "step": 184910 }, { "epoch": 3.2465457609859723, "grad_norm": 0.05259910580802678, "learning_rate": 5.8244194278975164e-05, "loss": 0.8442, "step": 184920 }, { "epoch": 3.2467213258659737, "grad_norm": 0.052200217986386264, "learning_rate": 5.8234143744340505e-05, "loss": 0.8498, "step": 184930 }, { "epoch": 3.246896890745975, "grad_norm": 0.06867905382344527, "learning_rate": 5.822409375155742e-05, "loss": 0.8435, "step": 184940 }, { "epoch": 3.2470724556259767, "grad_norm": 0.05620392396916582, "learning_rate": 5.8214044300753247e-05, "loss": 0.8436, "step": 184950 }, { "epoch": 3.247248020505978, "grad_norm": 0.046873439328896066, "learning_rate": 5.8203995392055346e-05, "loss": 0.8343, "step": 184960 }, { "epoch": 3.247423585385979, "grad_norm": 0.06749060035345454, "learning_rate": 5.8193947025591055e-05, "loss": 0.8359, "step": 184970 }, { "epoch": 3.2475991502659807, "grad_norm": 0.04191353379262066, "learning_rate": 5.8183899201487625e-05, "loss": 0.8435, "step": 184980 }, { "epoch": 3.247774715145982, "grad_norm": 0.047332760115030806, "learning_rate": 5.81738519198724e-05, "loss": 0.8394, "step": 184990 }, { "epoch": 3.2479502800259836, "grad_norm": 0.046378021121788855, "learning_rate": 5.816380518087271e-05, "loss": 0.8469, "step": 185000 }, { "epoch": 3.248125844905985, "grad_norm": 0.04513987011928773, "learning_rate": 5.8153758984615795e-05, "loss": 0.8349, "step": 185010 }, { "epoch": 3.2483014097859866, "grad_norm": 0.04428683458874726, "learning_rate": 5.814371333122895e-05, "loss": 0.8384, "step": 185020 }, { "epoch": 3.2484769746659876, "grad_norm": 0.06528813048374721, "learning_rate": 5.8133668220839475e-05, "loss": 0.8401, "step": 185030 }, { "epoch": 3.248652539545989, "grad_norm": 0.052261618035757976, "learning_rate": 5.812362365357466e-05, "loss": 0.8424, "step": 185040 }, { "epoch": 3.2488281044259906, "grad_norm": 0.04737975295458155, "learning_rate": 5.811357962956173e-05, "loss": 0.8476, "step": 185050 }, { "epoch": 3.249003669305992, "grad_norm": 0.05121717719654849, "learning_rate": 5.8103536148927964e-05, "loss": 0.8417, "step": 185060 }, { "epoch": 3.2491792341859935, "grad_norm": 0.05767321643913805, "learning_rate": 5.8093493211800666e-05, "loss": 0.8414, "step": 185070 }, { "epoch": 3.249354799065995, "grad_norm": 0.04392129265701165, "learning_rate": 5.8083450818306974e-05, "loss": 0.8419, "step": 185080 }, { "epoch": 3.249530363945996, "grad_norm": 0.047658790815133203, "learning_rate": 5.807340896857419e-05, "loss": 0.8418, "step": 185090 }, { "epoch": 3.2497059288259975, "grad_norm": 0.06510381453283089, "learning_rate": 5.8063367662729596e-05, "loss": 0.8368, "step": 185100 }, { "epoch": 3.249881493705999, "grad_norm": 0.05695956768042066, "learning_rate": 5.8053326900900305e-05, "loss": 0.8277, "step": 185110 }, { "epoch": 3.2500570585860005, "grad_norm": 0.04535302140321086, "learning_rate": 5.8043286683213606e-05, "loss": 0.8409, "step": 185120 }, { "epoch": 3.250232623466002, "grad_norm": 0.053516131493588706, "learning_rate": 5.8033247009796675e-05, "loss": 0.8444, "step": 185130 }, { "epoch": 3.2504081883460034, "grad_norm": 0.05643519680250108, "learning_rate": 5.8023207880776784e-05, "loss": 0.8478, "step": 185140 }, { "epoch": 3.250583753226005, "grad_norm": 0.05261608453696099, "learning_rate": 5.8013169296281057e-05, "loss": 0.8372, "step": 185150 }, { "epoch": 3.250759318106006, "grad_norm": 0.04663071297285905, "learning_rate": 5.8003131256436716e-05, "loss": 0.8321, "step": 185160 }, { "epoch": 3.2509348829860074, "grad_norm": 0.04825829634078794, "learning_rate": 5.7993093761370973e-05, "loss": 0.8431, "step": 185170 }, { "epoch": 3.251110447866009, "grad_norm": 0.054697710732549476, "learning_rate": 5.798305681121093e-05, "loss": 0.8493, "step": 185180 }, { "epoch": 3.2512860127460104, "grad_norm": 0.0675316174041659, "learning_rate": 5.7973020406083814e-05, "loss": 0.8421, "step": 185190 }, { "epoch": 3.251461577626012, "grad_norm": 0.052279274571173526, "learning_rate": 5.7962984546116785e-05, "loss": 0.8423, "step": 185200 }, { "epoch": 3.251637142506013, "grad_norm": 0.04207109579511031, "learning_rate": 5.7952949231437e-05, "loss": 0.8417, "step": 185210 }, { "epoch": 3.2518127073860144, "grad_norm": 0.06253793711616629, "learning_rate": 5.794291446217158e-05, "loss": 0.8393, "step": 185220 }, { "epoch": 3.251988272266016, "grad_norm": 0.052118138810322304, "learning_rate": 5.7932880238447683e-05, "loss": 0.8497, "step": 185230 }, { "epoch": 3.2521638371460173, "grad_norm": 0.05573204382564439, "learning_rate": 5.7922846560392485e-05, "loss": 0.8467, "step": 185240 }, { "epoch": 3.252339402026019, "grad_norm": 0.05237773534782745, "learning_rate": 5.791281342813304e-05, "loss": 0.8472, "step": 185250 }, { "epoch": 3.2525149669060203, "grad_norm": 0.06667961707976088, "learning_rate": 5.79027808417965e-05, "loss": 0.8339, "step": 185260 }, { "epoch": 3.2526905317860217, "grad_norm": 0.052009513106569594, "learning_rate": 5.7892748801510036e-05, "loss": 0.8462, "step": 185270 }, { "epoch": 3.2528660966660228, "grad_norm": 0.045965292683245604, "learning_rate": 5.788271730740066e-05, "loss": 0.8401, "step": 185280 }, { "epoch": 3.2530416615460243, "grad_norm": 0.08632097978582033, "learning_rate": 5.7872686359595506e-05, "loss": 0.8452, "step": 185290 }, { "epoch": 3.2532172264260257, "grad_norm": 0.057351329970485566, "learning_rate": 5.78626559582217e-05, "loss": 0.848, "step": 185300 }, { "epoch": 3.253392791306027, "grad_norm": 0.06519562496277555, "learning_rate": 5.785262610340634e-05, "loss": 0.8456, "step": 185310 }, { "epoch": 3.2535683561860287, "grad_norm": 0.04351018168128444, "learning_rate": 5.784259679527644e-05, "loss": 0.8457, "step": 185320 }, { "epoch": 3.2537439210660297, "grad_norm": 0.056745545160710814, "learning_rate": 5.7832568033959114e-05, "loss": 0.8483, "step": 185330 }, { "epoch": 3.253919485946031, "grad_norm": 0.06935682952276113, "learning_rate": 5.7822539819581454e-05, "loss": 0.8379, "step": 185340 }, { "epoch": 3.2540950508260327, "grad_norm": 0.06388685439551489, "learning_rate": 5.781251215227045e-05, "loss": 0.8432, "step": 185350 }, { "epoch": 3.254270615706034, "grad_norm": 0.05585297394299238, "learning_rate": 5.7802485032153194e-05, "loss": 0.8429, "step": 185360 }, { "epoch": 3.2544461805860356, "grad_norm": 0.051116606292073465, "learning_rate": 5.7792458459356746e-05, "loss": 0.8374, "step": 185370 }, { "epoch": 3.254621745466037, "grad_norm": 0.06244052216603926, "learning_rate": 5.778243243400815e-05, "loss": 0.8503, "step": 185380 }, { "epoch": 3.2547973103460386, "grad_norm": 0.04330450940020767, "learning_rate": 5.777240695623439e-05, "loss": 0.8469, "step": 185390 }, { "epoch": 3.2549728752260396, "grad_norm": 0.048760279773084524, "learning_rate": 5.776238202616252e-05, "loss": 0.8411, "step": 185400 }, { "epoch": 3.255148440106041, "grad_norm": 0.048312944767618965, "learning_rate": 5.77523576439196e-05, "loss": 0.8451, "step": 185410 }, { "epoch": 3.2553240049860426, "grad_norm": 0.05015244285744013, "learning_rate": 5.774233380963254e-05, "loss": 0.8456, "step": 185420 }, { "epoch": 3.255499569866044, "grad_norm": 0.05157513039847175, "learning_rate": 5.773231052342841e-05, "loss": 0.8412, "step": 185430 }, { "epoch": 3.2556751347460455, "grad_norm": 0.05142603478057689, "learning_rate": 5.772228778543424e-05, "loss": 0.833, "step": 185440 }, { "epoch": 3.2558506996260466, "grad_norm": 0.05960352022202547, "learning_rate": 5.771226559577694e-05, "loss": 0.8428, "step": 185450 }, { "epoch": 3.256026264506048, "grad_norm": 0.048667485073505976, "learning_rate": 5.770224395458352e-05, "loss": 0.841, "step": 185460 }, { "epoch": 3.2562018293860495, "grad_norm": 0.05055861884782479, "learning_rate": 5.769222286198098e-05, "loss": 0.8416, "step": 185470 }, { "epoch": 3.256377394266051, "grad_norm": 0.04669156146129456, "learning_rate": 5.768220231809631e-05, "loss": 0.8468, "step": 185480 }, { "epoch": 3.2565529591460525, "grad_norm": 0.052159651633536114, "learning_rate": 5.767218232305639e-05, "loss": 0.8438, "step": 185490 }, { "epoch": 3.256728524026054, "grad_norm": 0.11910068227725813, "learning_rate": 5.766216287698825e-05, "loss": 0.8471, "step": 185500 }, { "epoch": 3.2569040889060554, "grad_norm": 0.04878110165146401, "learning_rate": 5.765214398001882e-05, "loss": 0.8511, "step": 185510 }, { "epoch": 3.2570796537860565, "grad_norm": 0.05670014040826032, "learning_rate": 5.764212563227501e-05, "loss": 0.8475, "step": 185520 }, { "epoch": 3.257255218666058, "grad_norm": 0.046545767775613006, "learning_rate": 5.763210783388378e-05, "loss": 0.8473, "step": 185530 }, { "epoch": 3.2574307835460594, "grad_norm": 0.05392337551539763, "learning_rate": 5.7622090584972044e-05, "loss": 0.8382, "step": 185540 }, { "epoch": 3.257606348426061, "grad_norm": 0.047683560907619035, "learning_rate": 5.761207388566679e-05, "loss": 0.8413, "step": 185550 }, { "epoch": 3.2577819133060624, "grad_norm": 0.05674820959105953, "learning_rate": 5.760205773609483e-05, "loss": 0.8468, "step": 185560 }, { "epoch": 3.257957478186064, "grad_norm": 0.04900432846854775, "learning_rate": 5.7592042136383094e-05, "loss": 0.8432, "step": 185570 }, { "epoch": 3.258133043066065, "grad_norm": 0.05772015990832718, "learning_rate": 5.7582027086658564e-05, "loss": 0.8376, "step": 185580 }, { "epoch": 3.2583086079460664, "grad_norm": 0.052119788049831875, "learning_rate": 5.757201258704803e-05, "loss": 0.8467, "step": 185590 }, { "epoch": 3.258484172826068, "grad_norm": 0.04946754302859257, "learning_rate": 5.756199863767842e-05, "loss": 0.8445, "step": 185600 }, { "epoch": 3.2586597377060693, "grad_norm": 0.05046214096774248, "learning_rate": 5.7551985238676645e-05, "loss": 0.8462, "step": 185610 }, { "epoch": 3.258835302586071, "grad_norm": 0.05920479887418911, "learning_rate": 5.754197239016951e-05, "loss": 0.8449, "step": 185620 }, { "epoch": 3.2590108674660723, "grad_norm": 0.055714991777823644, "learning_rate": 5.753196009228391e-05, "loss": 0.8396, "step": 185630 }, { "epoch": 3.2591864323460733, "grad_norm": 0.04555082002594911, "learning_rate": 5.7521948345146716e-05, "loss": 0.8312, "step": 185640 }, { "epoch": 3.259361997226075, "grad_norm": 0.05689903205755594, "learning_rate": 5.7511937148884786e-05, "loss": 0.8409, "step": 185650 }, { "epoch": 3.2595375621060763, "grad_norm": 0.053809294023697114, "learning_rate": 5.750192650362494e-05, "loss": 0.8445, "step": 185660 }, { "epoch": 3.2597131269860777, "grad_norm": 0.05131055307855098, "learning_rate": 5.7491916409493994e-05, "loss": 0.8357, "step": 185670 }, { "epoch": 3.259888691866079, "grad_norm": 0.04786798475118406, "learning_rate": 5.748190686661885e-05, "loss": 0.8391, "step": 185680 }, { "epoch": 3.2600642567460807, "grad_norm": 0.048317815563771266, "learning_rate": 5.747189787512626e-05, "loss": 0.8452, "step": 185690 }, { "epoch": 3.2602398216260817, "grad_norm": 0.04485997567254521, "learning_rate": 5.746188943514305e-05, "loss": 0.8465, "step": 185700 }, { "epoch": 3.260415386506083, "grad_norm": 0.0473655461107759, "learning_rate": 5.745188154679606e-05, "loss": 0.8317, "step": 185710 }, { "epoch": 3.2605909513860847, "grad_norm": 0.10016425753317294, "learning_rate": 5.74418742102121e-05, "loss": 0.8388, "step": 185720 }, { "epoch": 3.260766516266086, "grad_norm": 0.05383686721102848, "learning_rate": 5.7431867425517915e-05, "loss": 0.8369, "step": 185730 }, { "epoch": 3.2609420811460876, "grad_norm": 0.05142359213776441, "learning_rate": 5.742186119284031e-05, "loss": 0.8338, "step": 185740 }, { "epoch": 3.261117646026089, "grad_norm": 0.04441037532475336, "learning_rate": 5.7411855512306135e-05, "loss": 0.8439, "step": 185750 }, { "epoch": 3.26129321090609, "grad_norm": 0.06207295270410876, "learning_rate": 5.740185038404204e-05, "loss": 0.8424, "step": 185760 }, { "epoch": 3.2614687757860916, "grad_norm": 0.05599202172871003, "learning_rate": 5.739184580817487e-05, "loss": 0.8468, "step": 185770 }, { "epoch": 3.261644340666093, "grad_norm": 0.06266605871879978, "learning_rate": 5.73818417848314e-05, "loss": 0.8349, "step": 185780 }, { "epoch": 3.2618199055460946, "grad_norm": 0.06042389031565253, "learning_rate": 5.7371838314138326e-05, "loss": 0.8475, "step": 185790 }, { "epoch": 3.261995470426096, "grad_norm": 0.04828676600639126, "learning_rate": 5.736183539622242e-05, "loss": 0.831, "step": 185800 }, { "epoch": 3.2621710353060975, "grad_norm": 0.05221482672235218, "learning_rate": 5.735183303121041e-05, "loss": 0.8442, "step": 185810 }, { "epoch": 3.262346600186099, "grad_norm": 0.06753708709123074, "learning_rate": 5.73418312192291e-05, "loss": 0.8391, "step": 185820 }, { "epoch": 3.2625221650661, "grad_norm": 0.06017301553391267, "learning_rate": 5.73318299604051e-05, "loss": 0.8367, "step": 185830 }, { "epoch": 3.2626977299461015, "grad_norm": 0.05683615477212134, "learning_rate": 5.732182925486519e-05, "loss": 0.839, "step": 185840 }, { "epoch": 3.262873294826103, "grad_norm": 0.0457756227508442, "learning_rate": 5.731182910273611e-05, "loss": 0.8412, "step": 185850 }, { "epoch": 3.2630488597061045, "grad_norm": 0.046279280952931615, "learning_rate": 5.7301829504144496e-05, "loss": 0.8431, "step": 185860 }, { "epoch": 3.263224424586106, "grad_norm": 0.0551320051615266, "learning_rate": 5.7291830459217064e-05, "loss": 0.8395, "step": 185870 }, { "epoch": 3.263399989466107, "grad_norm": 0.06165074073784295, "learning_rate": 5.7281831968080524e-05, "loss": 0.8441, "step": 185880 }, { "epoch": 3.2635755543461085, "grad_norm": 0.055201295621117, "learning_rate": 5.727183403086161e-05, "loss": 0.8371, "step": 185890 }, { "epoch": 3.26375111922611, "grad_norm": 0.04748344545537263, "learning_rate": 5.726183664768689e-05, "loss": 0.8452, "step": 185900 }, { "epoch": 3.2639266841061114, "grad_norm": 0.052060245509255136, "learning_rate": 5.725183981868305e-05, "loss": 0.8381, "step": 185910 }, { "epoch": 3.264102248986113, "grad_norm": 0.0541208274462344, "learning_rate": 5.724184354397686e-05, "loss": 0.8286, "step": 185920 }, { "epoch": 3.2642778138661144, "grad_norm": 0.08080320001898621, "learning_rate": 5.7231847823694837e-05, "loss": 0.8383, "step": 185930 }, { "epoch": 3.264453378746116, "grad_norm": 0.05326749015172998, "learning_rate": 5.722185265796369e-05, "loss": 0.8454, "step": 185940 }, { "epoch": 3.264628943626117, "grad_norm": 0.045733688249148426, "learning_rate": 5.7211858046910127e-05, "loss": 0.8427, "step": 185950 }, { "epoch": 3.2648045085061184, "grad_norm": 0.07292807197525593, "learning_rate": 5.7201863990660656e-05, "loss": 0.8395, "step": 185960 }, { "epoch": 3.26498007338612, "grad_norm": 0.04517546821302732, "learning_rate": 5.719187048934196e-05, "loss": 0.8399, "step": 185970 }, { "epoch": 3.2651556382661213, "grad_norm": 0.04640257042157944, "learning_rate": 5.718187754308066e-05, "loss": 0.8374, "step": 185980 }, { "epoch": 3.265331203146123, "grad_norm": 0.05060230167559333, "learning_rate": 5.717188515200342e-05, "loss": 0.8408, "step": 185990 }, { "epoch": 3.265506768026124, "grad_norm": 0.10714126835793661, "learning_rate": 5.7161893316236735e-05, "loss": 0.8387, "step": 186000 }, { "epoch": 3.2656823329061253, "grad_norm": 0.05912232502399635, "learning_rate": 5.715190203590728e-05, "loss": 0.8441, "step": 186010 }, { "epoch": 3.265857897786127, "grad_norm": 0.05488336250416575, "learning_rate": 5.714191131114166e-05, "loss": 0.8534, "step": 186020 }, { "epoch": 3.2660334626661283, "grad_norm": 0.04264547981548531, "learning_rate": 5.7131921142066404e-05, "loss": 0.8434, "step": 186030 }, { "epoch": 3.2662090275461297, "grad_norm": 0.05368040174960821, "learning_rate": 5.71219315288081e-05, "loss": 0.8459, "step": 186040 }, { "epoch": 3.266384592426131, "grad_norm": 0.04745138027504958, "learning_rate": 5.711194247149334e-05, "loss": 0.8372, "step": 186050 }, { "epoch": 3.2665601573061327, "grad_norm": 0.05747660365542539, "learning_rate": 5.710195397024871e-05, "loss": 0.8499, "step": 186060 }, { "epoch": 3.2667357221861337, "grad_norm": 0.05273117752836502, "learning_rate": 5.7091966025200716e-05, "loss": 0.8407, "step": 186070 }, { "epoch": 3.266911287066135, "grad_norm": 0.057392042064361874, "learning_rate": 5.708197863647592e-05, "loss": 0.8397, "step": 186080 }, { "epoch": 3.2670868519461367, "grad_norm": 0.04270922918903289, "learning_rate": 5.70719918042009e-05, "loss": 0.8443, "step": 186090 }, { "epoch": 3.267262416826138, "grad_norm": 0.05988212214675811, "learning_rate": 5.7062005528502125e-05, "loss": 0.8447, "step": 186100 }, { "epoch": 3.2674379817061396, "grad_norm": 0.06919750214356499, "learning_rate": 5.705201980950616e-05, "loss": 0.8393, "step": 186110 }, { "epoch": 3.2676135465861407, "grad_norm": 0.06437598239554464, "learning_rate": 5.704203464733957e-05, "loss": 0.8426, "step": 186120 }, { "epoch": 3.267789111466142, "grad_norm": 0.052042929224019506, "learning_rate": 5.703205004212879e-05, "loss": 0.8377, "step": 186130 }, { "epoch": 3.2679646763461436, "grad_norm": 0.05800562966916374, "learning_rate": 5.702206599400036e-05, "loss": 0.8379, "step": 186140 }, { "epoch": 3.268140241226145, "grad_norm": 0.040337497412233844, "learning_rate": 5.70120825030808e-05, "loss": 0.8381, "step": 186150 }, { "epoch": 3.2683158061061466, "grad_norm": 0.04332642319684308, "learning_rate": 5.700209956949659e-05, "loss": 0.8402, "step": 186160 }, { "epoch": 3.268491370986148, "grad_norm": 0.07550159776425847, "learning_rate": 5.6992117193374185e-05, "loss": 0.835, "step": 186170 }, { "epoch": 3.2686669358661495, "grad_norm": 0.06588981891988491, "learning_rate": 5.698213537484009e-05, "loss": 0.8513, "step": 186180 }, { "epoch": 3.2688425007461506, "grad_norm": 0.05683352257201385, "learning_rate": 5.697215411402082e-05, "loss": 0.8412, "step": 186190 }, { "epoch": 3.269018065626152, "grad_norm": 0.05516928136513448, "learning_rate": 5.696217341104275e-05, "loss": 0.8447, "step": 186200 }, { "epoch": 3.2691936305061535, "grad_norm": 0.059858644678125156, "learning_rate": 5.695219326603238e-05, "loss": 0.8464, "step": 186210 }, { "epoch": 3.269369195386155, "grad_norm": 0.04493435228136795, "learning_rate": 5.6942213679116174e-05, "loss": 0.8481, "step": 186220 }, { "epoch": 3.2695447602661565, "grad_norm": 0.06737847812819675, "learning_rate": 5.6932234650420605e-05, "loss": 0.8389, "step": 186230 }, { "epoch": 3.2697203251461575, "grad_norm": 0.04573184302577004, "learning_rate": 5.692225618007202e-05, "loss": 0.8404, "step": 186240 }, { "epoch": 3.269895890026159, "grad_norm": 0.07596613053732061, "learning_rate": 5.6912278268196896e-05, "loss": 0.8383, "step": 186250 }, { "epoch": 3.2700714549061605, "grad_norm": 0.04443716154285498, "learning_rate": 5.69023009149217e-05, "loss": 0.8397, "step": 186260 }, { "epoch": 3.270247019786162, "grad_norm": 0.04190740081803386, "learning_rate": 5.689232412037275e-05, "loss": 0.8414, "step": 186270 }, { "epoch": 3.2704225846661634, "grad_norm": 0.054579444911523986, "learning_rate": 5.688234788467653e-05, "loss": 0.8374, "step": 186280 }, { "epoch": 3.270598149546165, "grad_norm": 0.06632244594265825, "learning_rate": 5.687237220795945e-05, "loss": 0.8474, "step": 186290 }, { "epoch": 3.2707737144261664, "grad_norm": 0.07288535196055378, "learning_rate": 5.686239709034782e-05, "loss": 0.8387, "step": 186300 }, { "epoch": 3.2709492793061674, "grad_norm": 0.05174284063290461, "learning_rate": 5.685242253196808e-05, "loss": 0.839, "step": 186310 }, { "epoch": 3.271124844186169, "grad_norm": 0.04985149082358937, "learning_rate": 5.684244853294661e-05, "loss": 0.8474, "step": 186320 }, { "epoch": 3.2713004090661704, "grad_norm": 0.056871477721104624, "learning_rate": 5.683247509340983e-05, "loss": 0.8454, "step": 186330 }, { "epoch": 3.271475973946172, "grad_norm": 0.05118480899487593, "learning_rate": 5.682250221348401e-05, "loss": 0.8419, "step": 186340 }, { "epoch": 3.2716515388261733, "grad_norm": 0.047785060797934396, "learning_rate": 5.681252989329554e-05, "loss": 0.8396, "step": 186350 }, { "epoch": 3.271827103706175, "grad_norm": 0.06505976987608493, "learning_rate": 5.680255813297084e-05, "loss": 0.8419, "step": 186360 }, { "epoch": 3.272002668586176, "grad_norm": 0.04981136808005781, "learning_rate": 5.679258693263615e-05, "loss": 0.8452, "step": 186370 }, { "epoch": 3.2721782334661773, "grad_norm": 0.04719631670591931, "learning_rate": 5.678261629241787e-05, "loss": 0.8431, "step": 186380 }, { "epoch": 3.272353798346179, "grad_norm": 0.05097143409899824, "learning_rate": 5.677264621244229e-05, "loss": 0.837, "step": 186390 }, { "epoch": 3.2725293632261803, "grad_norm": 0.06152522500817044, "learning_rate": 5.676267669283582e-05, "loss": 0.8369, "step": 186400 }, { "epoch": 3.2727049281061817, "grad_norm": 0.07401715811492679, "learning_rate": 5.675270773372466e-05, "loss": 0.8381, "step": 186410 }, { "epoch": 3.272880492986183, "grad_norm": 0.04902910509474474, "learning_rate": 5.6742739335235184e-05, "loss": 0.8416, "step": 186420 }, { "epoch": 3.2730560578661843, "grad_norm": 0.05008877873897101, "learning_rate": 5.6732771497493716e-05, "loss": 0.8376, "step": 186430 }, { "epoch": 3.2732316227461857, "grad_norm": 0.060160001375544614, "learning_rate": 5.672280422062649e-05, "loss": 0.8409, "step": 186440 }, { "epoch": 3.273407187626187, "grad_norm": 0.05394545861967373, "learning_rate": 5.6712837504759806e-05, "loss": 0.8385, "step": 186450 }, { "epoch": 3.2735827525061887, "grad_norm": 0.06255260132406686, "learning_rate": 5.6702871350020004e-05, "loss": 0.8396, "step": 186460 }, { "epoch": 3.27375831738619, "grad_norm": 0.08135779037612512, "learning_rate": 5.669290575653328e-05, "loss": 0.8394, "step": 186470 }, { "epoch": 3.2739338822661916, "grad_norm": 0.04792758217787204, "learning_rate": 5.668294072442591e-05, "loss": 0.8372, "step": 186480 }, { "epoch": 3.274109447146193, "grad_norm": 0.07117059403742455, "learning_rate": 5.6672976253824196e-05, "loss": 0.8433, "step": 186490 }, { "epoch": 3.274285012026194, "grad_norm": 0.05529422476062434, "learning_rate": 5.66630123448544e-05, "loss": 0.8456, "step": 186500 }, { "epoch": 3.2744605769061956, "grad_norm": 0.06301182521276676, "learning_rate": 5.6653048997642675e-05, "loss": 0.8469, "step": 186510 }, { "epoch": 3.274636141786197, "grad_norm": 0.05937348206901403, "learning_rate": 5.6643086212315356e-05, "loss": 0.8438, "step": 186520 }, { "epoch": 3.2748117066661986, "grad_norm": 0.04500833459840994, "learning_rate": 5.663312398899865e-05, "loss": 0.8452, "step": 186530 }, { "epoch": 3.2749872715462, "grad_norm": 0.0658713344003714, "learning_rate": 5.6623162327818725e-05, "loss": 0.8374, "step": 186540 }, { "epoch": 3.275162836426201, "grad_norm": 0.04816793249617194, "learning_rate": 5.6613201228901856e-05, "loss": 0.8408, "step": 186550 }, { "epoch": 3.2753384013062026, "grad_norm": 0.055147064490275816, "learning_rate": 5.660324069237423e-05, "loss": 0.8425, "step": 186560 }, { "epoch": 3.275513966186204, "grad_norm": 0.07943823095181865, "learning_rate": 5.659328071836209e-05, "loss": 0.8412, "step": 186570 }, { "epoch": 3.2756895310662055, "grad_norm": 0.05414803389455297, "learning_rate": 5.6583321306991535e-05, "loss": 0.8402, "step": 186580 }, { "epoch": 3.275865095946207, "grad_norm": 0.05359461834193573, "learning_rate": 5.657336245838883e-05, "loss": 0.8358, "step": 186590 }, { "epoch": 3.2760406608262085, "grad_norm": 0.06739409261872199, "learning_rate": 5.656340417268017e-05, "loss": 0.8475, "step": 186600 }, { "epoch": 3.27621622570621, "grad_norm": 0.05327922752061686, "learning_rate": 5.6553446449991666e-05, "loss": 0.844, "step": 186610 }, { "epoch": 3.276391790586211, "grad_norm": 0.05071849676249465, "learning_rate": 5.65434892904495e-05, "loss": 0.836, "step": 186620 }, { "epoch": 3.2765673554662125, "grad_norm": 0.0518113008222446, "learning_rate": 5.653353269417989e-05, "loss": 0.8473, "step": 186630 }, { "epoch": 3.276742920346214, "grad_norm": 0.048041868363595655, "learning_rate": 5.652357666130891e-05, "loss": 0.8423, "step": 186640 }, { "epoch": 3.2769184852262154, "grad_norm": 0.05802544414688372, "learning_rate": 5.651362119196273e-05, "loss": 0.8368, "step": 186650 }, { "epoch": 3.277094050106217, "grad_norm": 0.05219532630452618, "learning_rate": 5.650366628626751e-05, "loss": 0.8424, "step": 186660 }, { "epoch": 3.277269614986218, "grad_norm": 0.0478576208685752, "learning_rate": 5.649371194434937e-05, "loss": 0.8392, "step": 186670 }, { "epoch": 3.2774451798662194, "grad_norm": 0.05020620525257764, "learning_rate": 5.648375816633442e-05, "loss": 0.8394, "step": 186680 }, { "epoch": 3.277620744746221, "grad_norm": 0.04934485811918082, "learning_rate": 5.6473804952348774e-05, "loss": 0.8436, "step": 186690 }, { "epoch": 3.2777963096262224, "grad_norm": 0.052262518726053246, "learning_rate": 5.6463852302518605e-05, "loss": 0.8397, "step": 186700 }, { "epoch": 3.277971874506224, "grad_norm": 0.05392748669805188, "learning_rate": 5.6453900216969914e-05, "loss": 0.8414, "step": 186710 }, { "epoch": 3.2781474393862253, "grad_norm": 0.07390758702133021, "learning_rate": 5.644394869582885e-05, "loss": 0.8427, "step": 186720 }, { "epoch": 3.278323004266227, "grad_norm": 0.06998957417625541, "learning_rate": 5.6433997739221476e-05, "loss": 0.8508, "step": 186730 }, { "epoch": 3.278498569146228, "grad_norm": 0.050861824213740706, "learning_rate": 5.642404734727394e-05, "loss": 0.8388, "step": 186740 }, { "epoch": 3.2786741340262293, "grad_norm": 0.045019896911244334, "learning_rate": 5.6414097520112244e-05, "loss": 0.8446, "step": 186750 }, { "epoch": 3.278849698906231, "grad_norm": 0.07099640666796621, "learning_rate": 5.6404148257862466e-05, "loss": 0.8446, "step": 186760 }, { "epoch": 3.2790252637862323, "grad_norm": 0.04596960900844939, "learning_rate": 5.6394199560650715e-05, "loss": 0.8315, "step": 186770 }, { "epoch": 3.2792008286662337, "grad_norm": 0.04858793331571325, "learning_rate": 5.638425142860296e-05, "loss": 0.837, "step": 186780 }, { "epoch": 3.279376393546235, "grad_norm": 0.059003085191019176, "learning_rate": 5.63743038618453e-05, "loss": 0.836, "step": 186790 }, { "epoch": 3.2795519584262363, "grad_norm": 0.06592418383348052, "learning_rate": 5.63643568605038e-05, "loss": 0.8451, "step": 186800 }, { "epoch": 3.2797275233062377, "grad_norm": 0.052258781284756375, "learning_rate": 5.6354410424704405e-05, "loss": 0.8446, "step": 186810 }, { "epoch": 3.279903088186239, "grad_norm": 0.05133502109151789, "learning_rate": 5.63444645545732e-05, "loss": 0.8416, "step": 186820 }, { "epoch": 3.2800786530662407, "grad_norm": 0.04607504127947738, "learning_rate": 5.6334519250236165e-05, "loss": 0.8418, "step": 186830 }, { "epoch": 3.280254217946242, "grad_norm": 0.03988047650996541, "learning_rate": 5.6324574511819396e-05, "loss": 0.8445, "step": 186840 }, { "epoch": 3.2804297828262436, "grad_norm": 0.053973459554948054, "learning_rate": 5.631463033944877e-05, "loss": 0.8408, "step": 186850 }, { "epoch": 3.2806053477062447, "grad_norm": 0.0567703758472805, "learning_rate": 5.630468673325036e-05, "loss": 0.8481, "step": 186860 }, { "epoch": 3.280780912586246, "grad_norm": 0.06362483545459716, "learning_rate": 5.629474369335016e-05, "loss": 0.8396, "step": 186870 }, { "epoch": 3.2809564774662476, "grad_norm": 0.05822387893932798, "learning_rate": 5.628480121987408e-05, "loss": 0.8456, "step": 186880 }, { "epoch": 3.281132042346249, "grad_norm": 0.06803077550751671, "learning_rate": 5.627485931294816e-05, "loss": 0.8471, "step": 186890 }, { "epoch": 3.2813076072262506, "grad_norm": 0.0543322223501457, "learning_rate": 5.626491797269834e-05, "loss": 0.8514, "step": 186900 }, { "epoch": 3.2814831721062516, "grad_norm": 0.06566732779730333, "learning_rate": 5.6254977199250616e-05, "loss": 0.8437, "step": 186910 }, { "epoch": 3.281658736986253, "grad_norm": 0.05879802412360698, "learning_rate": 5.624503699273086e-05, "loss": 0.838, "step": 186920 }, { "epoch": 3.2818343018662546, "grad_norm": 0.05530500498224623, "learning_rate": 5.623509735326508e-05, "loss": 0.8394, "step": 186930 }, { "epoch": 3.282009866746256, "grad_norm": 0.0448121006421922, "learning_rate": 5.6225158280979216e-05, "loss": 0.8339, "step": 186940 }, { "epoch": 3.2821854316262575, "grad_norm": 0.05526595294773155, "learning_rate": 5.6215219775999154e-05, "loss": 0.8374, "step": 186950 }, { "epoch": 3.282360996506259, "grad_norm": 0.05875178378567468, "learning_rate": 5.6205281838450826e-05, "loss": 0.842, "step": 186960 }, { "epoch": 3.2825365613862605, "grad_norm": 0.05165983769745394, "learning_rate": 5.61953444684602e-05, "loss": 0.8364, "step": 186970 }, { "epoch": 3.2827121262662615, "grad_norm": 0.03917768271540712, "learning_rate": 5.6185407666153125e-05, "loss": 0.8294, "step": 186980 }, { "epoch": 3.282887691146263, "grad_norm": 0.06766608603252015, "learning_rate": 5.61754714316555e-05, "loss": 0.8416, "step": 186990 }, { "epoch": 3.2830632560262645, "grad_norm": 0.05180737567778106, "learning_rate": 5.616553576509327e-05, "loss": 0.8388, "step": 187000 }, { "epoch": 3.283238820906266, "grad_norm": 0.07207062357215023, "learning_rate": 5.615560066659231e-05, "loss": 0.8489, "step": 187010 }, { "epoch": 3.2834143857862674, "grad_norm": 0.04980844985412598, "learning_rate": 5.6145666136278454e-05, "loss": 0.844, "step": 187020 }, { "epoch": 3.283589950666269, "grad_norm": 0.05157749889057648, "learning_rate": 5.613573217427761e-05, "loss": 0.8405, "step": 187030 }, { "epoch": 3.28376551554627, "grad_norm": 0.04300456234405837, "learning_rate": 5.6125798780715667e-05, "loss": 0.8423, "step": 187040 }, { "epoch": 3.2839410804262714, "grad_norm": 0.05339047935064294, "learning_rate": 5.6115865955718415e-05, "loss": 0.841, "step": 187050 }, { "epoch": 3.284116645306273, "grad_norm": 0.06435475430387042, "learning_rate": 5.610593369941175e-05, "loss": 0.8464, "step": 187060 }, { "epoch": 3.2842922101862744, "grad_norm": 0.06517536003941378, "learning_rate": 5.609600201192151e-05, "loss": 0.8403, "step": 187070 }, { "epoch": 3.284467775066276, "grad_norm": 0.06384751095228076, "learning_rate": 5.608607089337357e-05, "loss": 0.8439, "step": 187080 }, { "epoch": 3.2846433399462773, "grad_norm": 0.06694685670947832, "learning_rate": 5.607614034389367e-05, "loss": 0.8373, "step": 187090 }, { "epoch": 3.2848189048262784, "grad_norm": 0.06250917105546237, "learning_rate": 5.6066210363607686e-05, "loss": 0.8429, "step": 187100 }, { "epoch": 3.28499446970628, "grad_norm": 0.05809934073776498, "learning_rate": 5.605628095264147e-05, "loss": 0.8393, "step": 187110 }, { "epoch": 3.2851700345862813, "grad_norm": 0.051564480287071335, "learning_rate": 5.6046352111120736e-05, "loss": 0.8369, "step": 187120 }, { "epoch": 3.285345599466283, "grad_norm": 0.05257378410118228, "learning_rate": 5.603642383917135e-05, "loss": 0.8351, "step": 187130 }, { "epoch": 3.2855211643462843, "grad_norm": 0.04573697934711422, "learning_rate": 5.602649613691914e-05, "loss": 0.8422, "step": 187140 }, { "epoch": 3.2856967292262858, "grad_norm": 0.04745146668131821, "learning_rate": 5.601656900448978e-05, "loss": 0.8458, "step": 187150 }, { "epoch": 3.2858722941062872, "grad_norm": 0.05172564139015285, "learning_rate": 5.600664244200913e-05, "loss": 0.8487, "step": 187160 }, { "epoch": 3.2860478589862883, "grad_norm": 0.04224056221335726, "learning_rate": 5.5996716449602936e-05, "loss": 0.838, "step": 187170 }, { "epoch": 3.2862234238662897, "grad_norm": 0.05339941364855989, "learning_rate": 5.598679102739701e-05, "loss": 0.8442, "step": 187180 }, { "epoch": 3.286398988746291, "grad_norm": 0.06465513143509148, "learning_rate": 5.597686617551702e-05, "loss": 0.8368, "step": 187190 }, { "epoch": 3.2865745536262927, "grad_norm": 0.07112539106117968, "learning_rate": 5.596694189408878e-05, "loss": 0.841, "step": 187200 }, { "epoch": 3.286750118506294, "grad_norm": 0.05811772430820987, "learning_rate": 5.5957018183238044e-05, "loss": 0.8381, "step": 187210 }, { "epoch": 3.286925683386295, "grad_norm": 0.059911351775666026, "learning_rate": 5.594709504309049e-05, "loss": 0.836, "step": 187220 }, { "epoch": 3.2871012482662967, "grad_norm": 0.04019579506089568, "learning_rate": 5.593717247377187e-05, "loss": 0.8407, "step": 187230 }, { "epoch": 3.287276813146298, "grad_norm": 0.06562877616535763, "learning_rate": 5.592725047540791e-05, "loss": 0.832, "step": 187240 }, { "epoch": 3.2874523780262996, "grad_norm": 0.054038886728761115, "learning_rate": 5.591732904812438e-05, "loss": 0.842, "step": 187250 }, { "epoch": 3.287627942906301, "grad_norm": 0.04747532471925827, "learning_rate": 5.590740819204686e-05, "loss": 0.8376, "step": 187260 }, { "epoch": 3.2878035077863026, "grad_norm": 0.05691852660089238, "learning_rate": 5.589748790730114e-05, "loss": 0.8369, "step": 187270 }, { "epoch": 3.287979072666304, "grad_norm": 0.053985330615695375, "learning_rate": 5.5887568194012926e-05, "loss": 0.8535, "step": 187280 }, { "epoch": 3.288154637546305, "grad_norm": 0.08561497074201281, "learning_rate": 5.587764905230784e-05, "loss": 0.8502, "step": 187290 }, { "epoch": 3.2883302024263066, "grad_norm": 0.048729366454349166, "learning_rate": 5.5867730482311574e-05, "loss": 0.8374, "step": 187300 }, { "epoch": 3.288505767306308, "grad_norm": 0.043693253782149, "learning_rate": 5.5857812484149876e-05, "loss": 0.8528, "step": 187310 }, { "epoch": 3.2886813321863095, "grad_norm": 0.043598563399509876, "learning_rate": 5.584789505794827e-05, "loss": 0.8476, "step": 187320 }, { "epoch": 3.288856897066311, "grad_norm": 0.04743562423430732, "learning_rate": 5.583797820383252e-05, "loss": 0.8486, "step": 187330 }, { "epoch": 3.289032461946312, "grad_norm": 0.07480102633707061, "learning_rate": 5.582806192192823e-05, "loss": 0.8351, "step": 187340 }, { "epoch": 3.2892080268263135, "grad_norm": 0.0589496036136965, "learning_rate": 5.581814621236108e-05, "loss": 0.8392, "step": 187350 }, { "epoch": 3.289383591706315, "grad_norm": 0.047323060663540716, "learning_rate": 5.5808231075256645e-05, "loss": 0.8405, "step": 187360 }, { "epoch": 3.2895591565863165, "grad_norm": 0.05233577978645099, "learning_rate": 5.579831651074059e-05, "loss": 0.8365, "step": 187370 }, { "epoch": 3.289734721466318, "grad_norm": 0.052980010620280356, "learning_rate": 5.5788402518938565e-05, "loss": 0.8434, "step": 187380 }, { "epoch": 3.2899102863463194, "grad_norm": 0.057080557268860736, "learning_rate": 5.57784890999761e-05, "loss": 0.8432, "step": 187390 }, { "epoch": 3.290085851226321, "grad_norm": 0.05177662294074474, "learning_rate": 5.576857625397885e-05, "loss": 0.8452, "step": 187400 }, { "epoch": 3.290261416106322, "grad_norm": 0.06230624225761757, "learning_rate": 5.57586639810724e-05, "loss": 0.8425, "step": 187410 }, { "epoch": 3.2904369809863234, "grad_norm": 0.07163364382135151, "learning_rate": 5.5748752281382386e-05, "loss": 0.8434, "step": 187420 }, { "epoch": 3.290612545866325, "grad_norm": 0.058888944629568216, "learning_rate": 5.573884115503432e-05, "loss": 0.844, "step": 187430 }, { "epoch": 3.2907881107463264, "grad_norm": 0.04435441844953733, "learning_rate": 5.572893060215381e-05, "loss": 0.8472, "step": 187440 }, { "epoch": 3.290963675626328, "grad_norm": 0.056817234790947975, "learning_rate": 5.571902062286647e-05, "loss": 0.8343, "step": 187450 }, { "epoch": 3.291139240506329, "grad_norm": 0.058192974020247513, "learning_rate": 5.570911121729777e-05, "loss": 0.8393, "step": 187460 }, { "epoch": 3.2913148053863304, "grad_norm": 0.054780104354581784, "learning_rate": 5.5699202385573323e-05, "loss": 0.8394, "step": 187470 }, { "epoch": 3.291490370266332, "grad_norm": 0.04258755678862435, "learning_rate": 5.568929412781868e-05, "loss": 0.8397, "step": 187480 }, { "epoch": 3.2916659351463333, "grad_norm": 0.07068096033592025, "learning_rate": 5.567938644415934e-05, "loss": 0.8377, "step": 187490 }, { "epoch": 3.291841500026335, "grad_norm": 0.04762966940001732, "learning_rate": 5.566947933472087e-05, "loss": 0.8393, "step": 187500 }, { "epoch": 3.2920170649063363, "grad_norm": 0.05147168437291164, "learning_rate": 5.565957279962877e-05, "loss": 0.8334, "step": 187510 }, { "epoch": 3.2921926297863378, "grad_norm": 0.04866774534855943, "learning_rate": 5.5649666839008595e-05, "loss": 0.8391, "step": 187520 }, { "epoch": 3.292368194666339, "grad_norm": 0.049839357051387354, "learning_rate": 5.5639761452985816e-05, "loss": 0.8435, "step": 187530 }, { "epoch": 3.2925437595463403, "grad_norm": 0.04756246915714735, "learning_rate": 5.562985664168595e-05, "loss": 0.8428, "step": 187540 }, { "epoch": 3.2927193244263417, "grad_norm": 0.07230124188213527, "learning_rate": 5.561995240523454e-05, "loss": 0.8404, "step": 187550 }, { "epoch": 3.292894889306343, "grad_norm": 0.06563483938459268, "learning_rate": 5.561004874375698e-05, "loss": 0.8414, "step": 187560 }, { "epoch": 3.2930704541863447, "grad_norm": 0.06421044620034856, "learning_rate": 5.560014565737879e-05, "loss": 0.8521, "step": 187570 }, { "epoch": 3.2932460190663457, "grad_norm": 0.06412570745770863, "learning_rate": 5.559024314622547e-05, "loss": 0.8423, "step": 187580 }, { "epoch": 3.293421583946347, "grad_norm": 0.06947587687510147, "learning_rate": 5.558034121042251e-05, "loss": 0.8367, "step": 187590 }, { "epoch": 3.2935971488263487, "grad_norm": 0.0445788494465031, "learning_rate": 5.55704398500953e-05, "loss": 0.8352, "step": 187600 }, { "epoch": 3.29377271370635, "grad_norm": 0.04701610852906054, "learning_rate": 5.556053906536932e-05, "loss": 0.8383, "step": 187610 }, { "epoch": 3.2939482785863516, "grad_norm": 0.04396656528689304, "learning_rate": 5.555063885637005e-05, "loss": 0.836, "step": 187620 }, { "epoch": 3.294123843466353, "grad_norm": 0.044531779515699456, "learning_rate": 5.554073922322287e-05, "loss": 0.8386, "step": 187630 }, { "epoch": 3.2942994083463546, "grad_norm": 0.06523604040448505, "learning_rate": 5.553084016605322e-05, "loss": 0.8334, "step": 187640 }, { "epoch": 3.2944749732263556, "grad_norm": 0.054391157492333565, "learning_rate": 5.552094168498659e-05, "loss": 0.8434, "step": 187650 }, { "epoch": 3.294650538106357, "grad_norm": 0.06794470102647865, "learning_rate": 5.55110437801483e-05, "loss": 0.8388, "step": 187660 }, { "epoch": 3.2948261029863586, "grad_norm": 0.06395131913370213, "learning_rate": 5.550114645166381e-05, "loss": 0.836, "step": 187670 }, { "epoch": 3.29500166786636, "grad_norm": 0.051100393988954045, "learning_rate": 5.54912496996585e-05, "loss": 0.8373, "step": 187680 }, { "epoch": 3.2951772327463615, "grad_norm": 0.04508244158402082, "learning_rate": 5.548135352425783e-05, "loss": 0.8393, "step": 187690 }, { "epoch": 3.2953527976263626, "grad_norm": 0.04809491347075099, "learning_rate": 5.54714579255871e-05, "loss": 0.8428, "step": 187700 }, { "epoch": 3.295528362506364, "grad_norm": 0.05651575290776306, "learning_rate": 5.5461562903771714e-05, "loss": 0.8411, "step": 187710 }, { "epoch": 3.2957039273863655, "grad_norm": 0.07140438188809123, "learning_rate": 5.545166845893709e-05, "loss": 0.8434, "step": 187720 }, { "epoch": 3.295879492266367, "grad_norm": 0.0644732535545731, "learning_rate": 5.5441774591208516e-05, "loss": 0.8528, "step": 187730 }, { "epoch": 3.2960550571463685, "grad_norm": 0.048182932065583314, "learning_rate": 5.543188130071141e-05, "loss": 0.8348, "step": 187740 }, { "epoch": 3.29623062202637, "grad_norm": 0.06174170822029935, "learning_rate": 5.542198858757108e-05, "loss": 0.8387, "step": 187750 }, { "epoch": 3.2964061869063714, "grad_norm": 0.05262396585819535, "learning_rate": 5.541209645191294e-05, "loss": 0.8448, "step": 187760 }, { "epoch": 3.2965817517863725, "grad_norm": 0.06528234692301411, "learning_rate": 5.540220489386223e-05, "loss": 0.8436, "step": 187770 }, { "epoch": 3.296757316666374, "grad_norm": 0.06034157626796728, "learning_rate": 5.539231391354432e-05, "loss": 0.8465, "step": 187780 }, { "epoch": 3.2969328815463754, "grad_norm": 0.04854458807302771, "learning_rate": 5.538242351108458e-05, "loss": 0.8428, "step": 187790 }, { "epoch": 3.297108446426377, "grad_norm": 0.07987941133072252, "learning_rate": 5.537253368660823e-05, "loss": 0.8446, "step": 187800 }, { "epoch": 3.2972840113063784, "grad_norm": 0.05619849914791502, "learning_rate": 5.5362644440240615e-05, "loss": 0.8322, "step": 187810 }, { "epoch": 3.29745957618638, "grad_norm": 0.06032507748696715, "learning_rate": 5.535275577210709e-05, "loss": 0.8446, "step": 187820 }, { "epoch": 3.297635141066381, "grad_norm": 0.06506744894887932, "learning_rate": 5.5342867682332864e-05, "loss": 0.8444, "step": 187830 }, { "epoch": 3.2978107059463824, "grad_norm": 0.04639452535362073, "learning_rate": 5.5332980171043246e-05, "loss": 0.8438, "step": 187840 }, { "epoch": 3.297986270826384, "grad_norm": 0.0544268192383158, "learning_rate": 5.532309323836352e-05, "loss": 0.8449, "step": 187850 }, { "epoch": 3.2981618357063853, "grad_norm": 0.06568795883600453, "learning_rate": 5.531320688441901e-05, "loss": 0.8403, "step": 187860 }, { "epoch": 3.298337400586387, "grad_norm": 0.07151434283656274, "learning_rate": 5.530332110933487e-05, "loss": 0.8403, "step": 187870 }, { "epoch": 3.2985129654663883, "grad_norm": 0.057430523569984356, "learning_rate": 5.529343591323642e-05, "loss": 0.835, "step": 187880 }, { "epoch": 3.2986885303463893, "grad_norm": 0.07499020170784415, "learning_rate": 5.528355129624894e-05, "loss": 0.8484, "step": 187890 }, { "epoch": 3.298864095226391, "grad_norm": 0.052709835002322965, "learning_rate": 5.527366725849757e-05, "loss": 0.8492, "step": 187900 }, { "epoch": 3.2990396601063923, "grad_norm": 0.05314284873977334, "learning_rate": 5.5263783800107604e-05, "loss": 0.8401, "step": 187910 }, { "epoch": 3.2992152249863937, "grad_norm": 0.0560056525624995, "learning_rate": 5.525390092120427e-05, "loss": 0.8408, "step": 187920 }, { "epoch": 3.2993907898663952, "grad_norm": 0.05679686001759922, "learning_rate": 5.524401862191282e-05, "loss": 0.8412, "step": 187930 }, { "epoch": 3.2995663547463967, "grad_norm": 0.05376549932183391, "learning_rate": 5.5234136902358367e-05, "loss": 0.8463, "step": 187940 }, { "epoch": 3.299741919626398, "grad_norm": 0.05932417203569761, "learning_rate": 5.522425576266618e-05, "loss": 0.8386, "step": 187950 }, { "epoch": 3.299917484506399, "grad_norm": 0.05493975897743994, "learning_rate": 5.521437520296149e-05, "loss": 0.8437, "step": 187960 }, { "epoch": 3.3000930493864007, "grad_norm": 0.04395804117943977, "learning_rate": 5.520449522336939e-05, "loss": 0.8342, "step": 187970 }, { "epoch": 3.300268614266402, "grad_norm": 0.04788884595394651, "learning_rate": 5.519461582401514e-05, "loss": 0.844, "step": 187980 }, { "epoch": 3.3004441791464036, "grad_norm": 0.06029444771406243, "learning_rate": 5.518473700502389e-05, "loss": 0.8416, "step": 187990 }, { "epoch": 3.300619744026405, "grad_norm": 0.06004690076690455, "learning_rate": 5.517485876652079e-05, "loss": 0.8392, "step": 188000 }, { "epoch": 3.300795308906406, "grad_norm": 0.08131966526544993, "learning_rate": 5.5164981108630985e-05, "loss": 0.8404, "step": 188010 }, { "epoch": 3.3009708737864076, "grad_norm": 0.04102072496427912, "learning_rate": 5.5155104031479675e-05, "loss": 0.845, "step": 188020 }, { "epoch": 3.301146438666409, "grad_norm": 0.07342626265554952, "learning_rate": 5.5145227535192024e-05, "loss": 0.8402, "step": 188030 }, { "epoch": 3.3013220035464106, "grad_norm": 0.05559361838417433, "learning_rate": 5.513535161989308e-05, "loss": 0.8338, "step": 188040 }, { "epoch": 3.301497568426412, "grad_norm": 0.04871103020926843, "learning_rate": 5.512547628570801e-05, "loss": 0.8393, "step": 188050 }, { "epoch": 3.3016731333064135, "grad_norm": 0.06311042656764082, "learning_rate": 5.511560153276201e-05, "loss": 0.8505, "step": 188060 }, { "epoch": 3.301848698186415, "grad_norm": 0.05437375004412733, "learning_rate": 5.510572736118007e-05, "loss": 0.8461, "step": 188070 }, { "epoch": 3.302024263066416, "grad_norm": 0.04468874028624708, "learning_rate": 5.509585377108738e-05, "loss": 0.8469, "step": 188080 }, { "epoch": 3.3021998279464175, "grad_norm": 0.05498567128129493, "learning_rate": 5.508598076260901e-05, "loss": 0.8437, "step": 188090 }, { "epoch": 3.302375392826419, "grad_norm": 0.051416343435312484, "learning_rate": 5.5076108335870113e-05, "loss": 0.8492, "step": 188100 }, { "epoch": 3.3025509577064205, "grad_norm": 0.061728196146596144, "learning_rate": 5.506623649099567e-05, "loss": 0.8463, "step": 188110 }, { "epoch": 3.302726522586422, "grad_norm": 0.06758036418116091, "learning_rate": 5.505636522811082e-05, "loss": 0.8312, "step": 188120 }, { "epoch": 3.302902087466423, "grad_norm": 0.051310275221777175, "learning_rate": 5.504649454734067e-05, "loss": 0.8389, "step": 188130 }, { "epoch": 3.3030776523464245, "grad_norm": 0.05643359995370061, "learning_rate": 5.5036624448810205e-05, "loss": 0.847, "step": 188140 }, { "epoch": 3.303253217226426, "grad_norm": 0.05141366009232395, "learning_rate": 5.5026754932644506e-05, "loss": 0.8364, "step": 188150 }, { "epoch": 3.3034287821064274, "grad_norm": 0.06397890437337943, "learning_rate": 5.501688599896868e-05, "loss": 0.8362, "step": 188160 }, { "epoch": 3.303604346986429, "grad_norm": 0.06519832606933068, "learning_rate": 5.5007017647907675e-05, "loss": 0.8384, "step": 188170 }, { "epoch": 3.3037799118664304, "grad_norm": 0.04711253071247089, "learning_rate": 5.499714987958658e-05, "loss": 0.8475, "step": 188180 }, { "epoch": 3.303955476746432, "grad_norm": 0.04964133054691046, "learning_rate": 5.4987282694130394e-05, "loss": 0.8451, "step": 188190 }, { "epoch": 3.304131041626433, "grad_norm": 0.084973666899913, "learning_rate": 5.4977416091664215e-05, "loss": 0.8439, "step": 188200 }, { "epoch": 3.3043066065064344, "grad_norm": 0.04522139758956372, "learning_rate": 5.4967550072312946e-05, "loss": 0.8483, "step": 188210 }, { "epoch": 3.304482171386436, "grad_norm": 0.05026725464349196, "learning_rate": 5.495768463620164e-05, "loss": 0.8322, "step": 188220 }, { "epoch": 3.3046577362664373, "grad_norm": 0.055182119301050415, "learning_rate": 5.494781978345533e-05, "loss": 0.8419, "step": 188230 }, { "epoch": 3.304833301146439, "grad_norm": 0.04988263399875774, "learning_rate": 5.4937955514198945e-05, "loss": 0.8467, "step": 188240 }, { "epoch": 3.30500886602644, "grad_norm": 0.05697099996958386, "learning_rate": 5.4928091828557474e-05, "loss": 0.8471, "step": 188250 }, { "epoch": 3.3051844309064413, "grad_norm": 0.08001571268247012, "learning_rate": 5.491822872665592e-05, "loss": 0.8457, "step": 188260 }, { "epoch": 3.305359995786443, "grad_norm": 0.049720849319713996, "learning_rate": 5.490836620861928e-05, "loss": 0.8367, "step": 188270 }, { "epoch": 3.3055355606664443, "grad_norm": 0.04756646101456504, "learning_rate": 5.489850427457242e-05, "loss": 0.84, "step": 188280 }, { "epoch": 3.3057111255464457, "grad_norm": 0.05604126297435017, "learning_rate": 5.488864292464035e-05, "loss": 0.8356, "step": 188290 }, { "epoch": 3.3058866904264472, "grad_norm": 0.04921828146842735, "learning_rate": 5.487878215894805e-05, "loss": 0.8359, "step": 188300 }, { "epoch": 3.3060622553064487, "grad_norm": 0.05226327186080261, "learning_rate": 5.486892197762039e-05, "loss": 0.8348, "step": 188310 }, { "epoch": 3.3062378201864497, "grad_norm": 0.042764462068133426, "learning_rate": 5.4859062380782334e-05, "loss": 0.8417, "step": 188320 }, { "epoch": 3.306413385066451, "grad_norm": 0.05555471820821586, "learning_rate": 5.484920336855882e-05, "loss": 0.8449, "step": 188330 }, { "epoch": 3.3065889499464527, "grad_norm": 0.06159032520665692, "learning_rate": 5.483934494107472e-05, "loss": 0.8424, "step": 188340 }, { "epoch": 3.306764514826454, "grad_norm": 0.15324502131012205, "learning_rate": 5.4829487098454954e-05, "loss": 0.8476, "step": 188350 }, { "epoch": 3.3069400797064556, "grad_norm": 0.04583010517474038, "learning_rate": 5.481962984082445e-05, "loss": 0.8462, "step": 188360 }, { "epoch": 3.3071156445864567, "grad_norm": 0.08822450243125395, "learning_rate": 5.480977316830812e-05, "loss": 0.8474, "step": 188370 }, { "epoch": 3.307291209466458, "grad_norm": 0.04316264767426342, "learning_rate": 5.479991708103076e-05, "loss": 0.8522, "step": 188380 }, { "epoch": 3.3074667743464596, "grad_norm": 0.06403100813518539, "learning_rate": 5.479006157911732e-05, "loss": 0.8424, "step": 188390 }, { "epoch": 3.307642339226461, "grad_norm": 0.047957866186606246, "learning_rate": 5.4780206662692704e-05, "loss": 0.85, "step": 188400 }, { "epoch": 3.3078179041064626, "grad_norm": 0.05718167784153297, "learning_rate": 5.4770352331881666e-05, "loss": 0.8429, "step": 188410 }, { "epoch": 3.307993468986464, "grad_norm": 0.052272537413278336, "learning_rate": 5.476049858680916e-05, "loss": 0.843, "step": 188420 }, { "epoch": 3.3081690338664655, "grad_norm": 0.05953647945218844, "learning_rate": 5.4750645427599966e-05, "loss": 0.8431, "step": 188430 }, { "epoch": 3.3083445987464666, "grad_norm": 0.057026947449312436, "learning_rate": 5.474079285437901e-05, "loss": 0.837, "step": 188440 }, { "epoch": 3.308520163626468, "grad_norm": 0.052060077258774366, "learning_rate": 5.473094086727103e-05, "loss": 0.8421, "step": 188450 }, { "epoch": 3.3086957285064695, "grad_norm": 0.05036635386672244, "learning_rate": 5.47210894664009e-05, "loss": 0.8437, "step": 188460 }, { "epoch": 3.308871293386471, "grad_norm": 0.05032288033514849, "learning_rate": 5.471123865189348e-05, "loss": 0.8388, "step": 188470 }, { "epoch": 3.3090468582664725, "grad_norm": 0.051495101115482944, "learning_rate": 5.470138842387348e-05, "loss": 0.839, "step": 188480 }, { "epoch": 3.309222423146474, "grad_norm": 0.06676249421004482, "learning_rate": 5.469153878246579e-05, "loss": 0.8432, "step": 188490 }, { "epoch": 3.309397988026475, "grad_norm": 0.08454936650951901, "learning_rate": 5.4681689727795196e-05, "loss": 0.8458, "step": 188500 }, { "epoch": 3.3095735529064765, "grad_norm": 0.06627754149352143, "learning_rate": 5.4671841259986436e-05, "loss": 0.8415, "step": 188510 }, { "epoch": 3.309749117786478, "grad_norm": 0.056798327583904185, "learning_rate": 5.4661993379164324e-05, "loss": 0.8393, "step": 188520 }, { "epoch": 3.3099246826664794, "grad_norm": 0.056225789135752526, "learning_rate": 5.4652146085453634e-05, "loss": 0.8352, "step": 188530 }, { "epoch": 3.310100247546481, "grad_norm": 0.053054641897268696, "learning_rate": 5.464229937897918e-05, "loss": 0.8416, "step": 188540 }, { "epoch": 3.3102758124264824, "grad_norm": 0.04578637421801843, "learning_rate": 5.463245325986564e-05, "loss": 0.846, "step": 188550 }, { "epoch": 3.3104513773064834, "grad_norm": 0.05618874778033527, "learning_rate": 5.4622607728237796e-05, "loss": 0.8358, "step": 188560 }, { "epoch": 3.310626942186485, "grad_norm": 0.045572131678131636, "learning_rate": 5.461276278422044e-05, "loss": 0.8454, "step": 188570 }, { "epoch": 3.3108025070664864, "grad_norm": 0.062378806211410905, "learning_rate": 5.4602918427938244e-05, "loss": 0.8431, "step": 188580 }, { "epoch": 3.310978071946488, "grad_norm": 0.05486832935237594, "learning_rate": 5.459307465951595e-05, "loss": 0.8297, "step": 188590 }, { "epoch": 3.3111536368264893, "grad_norm": 0.04182835393099748, "learning_rate": 5.45832314790783e-05, "loss": 0.8493, "step": 188600 }, { "epoch": 3.311329201706491, "grad_norm": 0.050063225082640224, "learning_rate": 5.4573388886750047e-05, "loss": 0.846, "step": 188610 }, { "epoch": 3.3115047665864923, "grad_norm": 0.056759901545790664, "learning_rate": 5.4563546882655805e-05, "loss": 0.8451, "step": 188620 }, { "epoch": 3.3116803314664933, "grad_norm": 0.05808854068190132, "learning_rate": 5.455370546692031e-05, "loss": 0.8369, "step": 188630 }, { "epoch": 3.311855896346495, "grad_norm": 0.0604057319236312, "learning_rate": 5.454386463966833e-05, "loss": 0.8404, "step": 188640 }, { "epoch": 3.3120314612264963, "grad_norm": 0.047283536613036005, "learning_rate": 5.4534024401024444e-05, "loss": 0.8402, "step": 188650 }, { "epoch": 3.3122070261064978, "grad_norm": 0.055530492580010374, "learning_rate": 5.452418475111337e-05, "loss": 0.8367, "step": 188660 }, { "epoch": 3.3123825909864992, "grad_norm": 0.06039194314943583, "learning_rate": 5.451434569005983e-05, "loss": 0.8333, "step": 188670 }, { "epoch": 3.3125581558665003, "grad_norm": 0.05511140426925219, "learning_rate": 5.45045072179884e-05, "loss": 0.8367, "step": 188680 }, { "epoch": 3.3127337207465017, "grad_norm": 0.03725313901020002, "learning_rate": 5.449466933502377e-05, "loss": 0.8391, "step": 188690 }, { "epoch": 3.312909285626503, "grad_norm": 0.05636281408770627, "learning_rate": 5.44848320412906e-05, "loss": 0.8323, "step": 188700 }, { "epoch": 3.3130848505065047, "grad_norm": 0.060125247999110024, "learning_rate": 5.447499533691354e-05, "loss": 0.8388, "step": 188710 }, { "epoch": 3.313260415386506, "grad_norm": 0.05268548256624648, "learning_rate": 5.4465159222017184e-05, "loss": 0.8456, "step": 188720 }, { "epoch": 3.3134359802665077, "grad_norm": 0.04131191914627482, "learning_rate": 5.445532369672617e-05, "loss": 0.8545, "step": 188730 }, { "epoch": 3.313611545146509, "grad_norm": 0.048795539056496945, "learning_rate": 5.444548876116518e-05, "loss": 0.8394, "step": 188740 }, { "epoch": 3.31378711002651, "grad_norm": 0.058874618046789125, "learning_rate": 5.4435654415458716e-05, "loss": 0.8359, "step": 188750 }, { "epoch": 3.3139626749065116, "grad_norm": 0.04601977044907312, "learning_rate": 5.442582065973143e-05, "loss": 0.8379, "step": 188760 }, { "epoch": 3.314138239786513, "grad_norm": 0.05417863926246087, "learning_rate": 5.4415987494107935e-05, "loss": 0.8415, "step": 188770 }, { "epoch": 3.3143138046665146, "grad_norm": 0.05493349372701165, "learning_rate": 5.4406154918712834e-05, "loss": 0.8371, "step": 188780 }, { "epoch": 3.314489369546516, "grad_norm": 0.06620077437303802, "learning_rate": 5.439632293367065e-05, "loss": 0.8477, "step": 188790 }, { "epoch": 3.314664934426517, "grad_norm": 0.04750718154760229, "learning_rate": 5.4386491539105985e-05, "loss": 0.8383, "step": 188800 }, { "epoch": 3.3148404993065186, "grad_norm": 0.059073424154455455, "learning_rate": 5.437666073514345e-05, "loss": 0.8318, "step": 188810 }, { "epoch": 3.31501606418652, "grad_norm": 0.05709885731914001, "learning_rate": 5.436683052190751e-05, "loss": 0.8415, "step": 188820 }, { "epoch": 3.3151916290665215, "grad_norm": 0.059300552819550545, "learning_rate": 5.435700089952277e-05, "loss": 0.8412, "step": 188830 }, { "epoch": 3.315367193946523, "grad_norm": 0.061249559101184375, "learning_rate": 5.434717186811379e-05, "loss": 0.8473, "step": 188840 }, { "epoch": 3.3155427588265245, "grad_norm": 0.06903902508222046, "learning_rate": 5.4337343427805066e-05, "loss": 0.8364, "step": 188850 }, { "epoch": 3.315718323706526, "grad_norm": 0.04797787417009455, "learning_rate": 5.432751557872113e-05, "loss": 0.8481, "step": 188860 }, { "epoch": 3.315893888586527, "grad_norm": 0.07498501927355607, "learning_rate": 5.431768832098653e-05, "loss": 0.8358, "step": 188870 }, { "epoch": 3.3160694534665285, "grad_norm": 0.04884192028284671, "learning_rate": 5.43078616547258e-05, "loss": 0.8454, "step": 188880 }, { "epoch": 3.31624501834653, "grad_norm": 0.06717861959420066, "learning_rate": 5.4298035580063365e-05, "loss": 0.8451, "step": 188890 }, { "epoch": 3.3164205832265314, "grad_norm": 0.0527923751689476, "learning_rate": 5.428821009712377e-05, "loss": 0.8407, "step": 188900 }, { "epoch": 3.316596148106533, "grad_norm": 0.050328404856147084, "learning_rate": 5.4278385206031546e-05, "loss": 0.8346, "step": 188910 }, { "epoch": 3.316771712986534, "grad_norm": 0.08458518255591137, "learning_rate": 5.4268560906911104e-05, "loss": 0.844, "step": 188920 }, { "epoch": 3.3169472778665354, "grad_norm": 0.04848740103133456, "learning_rate": 5.4258737199886945e-05, "loss": 0.8371, "step": 188930 }, { "epoch": 3.317122842746537, "grad_norm": 0.06572282645962602, "learning_rate": 5.4248914085083545e-05, "loss": 0.8438, "step": 188940 }, { "epoch": 3.3172984076265384, "grad_norm": 0.0654673482313044, "learning_rate": 5.42390915626254e-05, "loss": 0.8368, "step": 188950 }, { "epoch": 3.31747397250654, "grad_norm": 0.0421083466636067, "learning_rate": 5.4229269632636895e-05, "loss": 0.8348, "step": 188960 }, { "epoch": 3.3176495373865413, "grad_norm": 0.053279282748816874, "learning_rate": 5.421944829524249e-05, "loss": 0.8384, "step": 188970 }, { "epoch": 3.317825102266543, "grad_norm": 0.05817711172375571, "learning_rate": 5.42096275505667e-05, "loss": 0.8372, "step": 188980 }, { "epoch": 3.318000667146544, "grad_norm": 0.05513395960740993, "learning_rate": 5.419980739873385e-05, "loss": 0.8461, "step": 188990 }, { "epoch": 3.3181762320265453, "grad_norm": 0.04824082898075808, "learning_rate": 5.41899878398684e-05, "loss": 0.8494, "step": 189000 }, { "epoch": 3.318351796906547, "grad_norm": 0.048454872104198556, "learning_rate": 5.418016887409483e-05, "loss": 0.834, "step": 189010 }, { "epoch": 3.3185273617865483, "grad_norm": 0.08192917005410971, "learning_rate": 5.417035050153744e-05, "loss": 0.8373, "step": 189020 }, { "epoch": 3.3187029266665498, "grad_norm": 0.06369356676404798, "learning_rate": 5.41605327223207e-05, "loss": 0.8491, "step": 189030 }, { "epoch": 3.318878491546551, "grad_norm": 0.06513602748356759, "learning_rate": 5.4150715536568976e-05, "loss": 0.8421, "step": 189040 }, { "epoch": 3.3190540564265523, "grad_norm": 0.054113222621474506, "learning_rate": 5.414089894440669e-05, "loss": 0.8362, "step": 189050 }, { "epoch": 3.3192296213065537, "grad_norm": 0.053822506781832924, "learning_rate": 5.413108294595818e-05, "loss": 0.8379, "step": 189060 }, { "epoch": 3.3194051861865552, "grad_norm": 0.05543233504069285, "learning_rate": 5.4121267541347806e-05, "loss": 0.8426, "step": 189070 }, { "epoch": 3.3195807510665567, "grad_norm": 0.0533414617152652, "learning_rate": 5.411145273070003e-05, "loss": 0.8373, "step": 189080 }, { "epoch": 3.319756315946558, "grad_norm": 0.07302750529827116, "learning_rate": 5.4101638514139086e-05, "loss": 0.8459, "step": 189090 }, { "epoch": 3.3199318808265597, "grad_norm": 0.06545672586754138, "learning_rate": 5.409182489178936e-05, "loss": 0.8471, "step": 189100 }, { "epoch": 3.3201074457065607, "grad_norm": 0.06358831117921779, "learning_rate": 5.408201186377521e-05, "loss": 0.8442, "step": 189110 }, { "epoch": 3.320283010586562, "grad_norm": 0.04446808951434463, "learning_rate": 5.407219943022098e-05, "loss": 0.8412, "step": 189120 }, { "epoch": 3.3204585754665636, "grad_norm": 0.04170255084503563, "learning_rate": 5.406238759125096e-05, "loss": 0.8541, "step": 189130 }, { "epoch": 3.320634140346565, "grad_norm": 0.058720881702952525, "learning_rate": 5.405257634698948e-05, "loss": 0.8473, "step": 189140 }, { "epoch": 3.3208097052265666, "grad_norm": 0.04900992965230126, "learning_rate": 5.404276569756088e-05, "loss": 0.8346, "step": 189150 }, { "epoch": 3.3209852701065676, "grad_norm": 0.051157792139762584, "learning_rate": 5.403295564308941e-05, "loss": 0.8376, "step": 189160 }, { "epoch": 3.321160834986569, "grad_norm": 0.058274119978565235, "learning_rate": 5.402314618369939e-05, "loss": 0.8378, "step": 189170 }, { "epoch": 3.3213363998665706, "grad_norm": 0.056457136316671576, "learning_rate": 5.4013337319515154e-05, "loss": 0.8406, "step": 189180 }, { "epoch": 3.321511964746572, "grad_norm": 0.05123784757207901, "learning_rate": 5.40035290506609e-05, "loss": 0.8449, "step": 189190 }, { "epoch": 3.3216875296265735, "grad_norm": 0.061720709450122285, "learning_rate": 5.399372137726093e-05, "loss": 0.8339, "step": 189200 }, { "epoch": 3.321863094506575, "grad_norm": 0.06595297885761715, "learning_rate": 5.3983914299439545e-05, "loss": 0.8342, "step": 189210 }, { "epoch": 3.3220386593865765, "grad_norm": 0.053472812420902675, "learning_rate": 5.3974107817320984e-05, "loss": 0.8426, "step": 189220 }, { "epoch": 3.3222142242665775, "grad_norm": 0.05603873009800822, "learning_rate": 5.396430193102947e-05, "loss": 0.847, "step": 189230 }, { "epoch": 3.322389789146579, "grad_norm": 0.06136008889119566, "learning_rate": 5.3954496640689266e-05, "loss": 0.8434, "step": 189240 }, { "epoch": 3.3225653540265805, "grad_norm": 0.06339201948613171, "learning_rate": 5.394469194642464e-05, "loss": 0.8331, "step": 189250 }, { "epoch": 3.322740918906582, "grad_norm": 0.055206687958636216, "learning_rate": 5.393488784835974e-05, "loss": 0.8441, "step": 189260 }, { "epoch": 3.3229164837865834, "grad_norm": 0.05963300779271633, "learning_rate": 5.392508434661884e-05, "loss": 0.8421, "step": 189270 }, { "epoch": 3.323092048666585, "grad_norm": 0.05399994955703744, "learning_rate": 5.391528144132614e-05, "loss": 0.8509, "step": 189280 }, { "epoch": 3.323267613546586, "grad_norm": 0.052015646258120316, "learning_rate": 5.390547913260587e-05, "loss": 0.8372, "step": 189290 }, { "epoch": 3.3234431784265874, "grad_norm": 0.051091587337023375, "learning_rate": 5.389567742058218e-05, "loss": 0.8364, "step": 189300 }, { "epoch": 3.323618743306589, "grad_norm": 0.04287546359278542, "learning_rate": 5.3885876305379304e-05, "loss": 0.8295, "step": 189310 }, { "epoch": 3.3237943081865904, "grad_norm": 0.07410245842997132, "learning_rate": 5.3876075787121415e-05, "loss": 0.8401, "step": 189320 }, { "epoch": 3.323969873066592, "grad_norm": 0.054410826188134094, "learning_rate": 5.3866275865932646e-05, "loss": 0.8394, "step": 189330 }, { "epoch": 3.3241454379465933, "grad_norm": 0.052568098634607235, "learning_rate": 5.3856476541937205e-05, "loss": 0.8424, "step": 189340 }, { "epoch": 3.3243210028265944, "grad_norm": 0.04291832350882859, "learning_rate": 5.3846677815259275e-05, "loss": 0.8523, "step": 189350 }, { "epoch": 3.324496567706596, "grad_norm": 0.05405553118128843, "learning_rate": 5.3836879686022937e-05, "loss": 0.8342, "step": 189360 }, { "epoch": 3.3246721325865973, "grad_norm": 0.061975073643158796, "learning_rate": 5.382708215435236e-05, "loss": 0.8404, "step": 189370 }, { "epoch": 3.324847697466599, "grad_norm": 0.05529494695958789, "learning_rate": 5.3817285220371704e-05, "loss": 0.8399, "step": 189380 }, { "epoch": 3.3250232623466003, "grad_norm": 0.05661619103709557, "learning_rate": 5.380748888420513e-05, "loss": 0.8337, "step": 189390 }, { "epoch": 3.3251988272266018, "grad_norm": 0.04571081633459759, "learning_rate": 5.379769314597667e-05, "loss": 0.8433, "step": 189400 }, { "epoch": 3.3253743921066032, "grad_norm": 0.052782579383291645, "learning_rate": 5.378789800581048e-05, "loss": 0.8361, "step": 189410 }, { "epoch": 3.3255499569866043, "grad_norm": 0.060984903364383856, "learning_rate": 5.377810346383071e-05, "loss": 0.8326, "step": 189420 }, { "epoch": 3.3257255218666057, "grad_norm": 0.0474245269928329, "learning_rate": 5.3768309520161384e-05, "loss": 0.8489, "step": 189430 }, { "epoch": 3.3259010867466072, "grad_norm": 0.049169609492749816, "learning_rate": 5.375851617492662e-05, "loss": 0.8475, "step": 189440 }, { "epoch": 3.3260766516266087, "grad_norm": 0.06616896023461705, "learning_rate": 5.3748723428250526e-05, "loss": 0.8426, "step": 189450 }, { "epoch": 3.32625221650661, "grad_norm": 0.07214393967453883, "learning_rate": 5.373893128025719e-05, "loss": 0.8418, "step": 189460 }, { "epoch": 3.326427781386611, "grad_norm": 0.04828367173674802, "learning_rate": 5.3729139731070613e-05, "loss": 0.844, "step": 189470 }, { "epoch": 3.3266033462666127, "grad_norm": 0.05187869822466638, "learning_rate": 5.371934878081489e-05, "loss": 0.8376, "step": 189480 }, { "epoch": 3.326778911146614, "grad_norm": 0.08937429387272719, "learning_rate": 5.370955842961414e-05, "loss": 0.8441, "step": 189490 }, { "epoch": 3.3269544760266156, "grad_norm": 0.04589367674415919, "learning_rate": 5.369976867759229e-05, "loss": 0.8357, "step": 189500 }, { "epoch": 3.327130040906617, "grad_norm": 0.05881932738410323, "learning_rate": 5.3689979524873444e-05, "loss": 0.8347, "step": 189510 }, { "epoch": 3.3273056057866186, "grad_norm": 0.07506330402387329, "learning_rate": 5.3680190971581655e-05, "loss": 0.8417, "step": 189520 }, { "epoch": 3.32748117066662, "grad_norm": 0.05809387842191893, "learning_rate": 5.367040301784089e-05, "loss": 0.8464, "step": 189530 }, { "epoch": 3.327656735546621, "grad_norm": 0.051913550978840706, "learning_rate": 5.3660615663775174e-05, "loss": 0.8353, "step": 189540 }, { "epoch": 3.3278323004266226, "grad_norm": 0.058541221617859435, "learning_rate": 5.365082890950855e-05, "loss": 0.8374, "step": 189550 }, { "epoch": 3.328007865306624, "grad_norm": 0.04991979491652878, "learning_rate": 5.364104275516502e-05, "loss": 0.8474, "step": 189560 }, { "epoch": 3.3281834301866255, "grad_norm": 0.050092345538252155, "learning_rate": 5.3631257200868515e-05, "loss": 0.8376, "step": 189570 }, { "epoch": 3.328358995066627, "grad_norm": 0.05093500833387768, "learning_rate": 5.362147224674307e-05, "loss": 0.8382, "step": 189580 }, { "epoch": 3.328534559946628, "grad_norm": 0.04623444043714597, "learning_rate": 5.3611687892912675e-05, "loss": 0.8368, "step": 189590 }, { "epoch": 3.3287101248266295, "grad_norm": 0.05594380336900717, "learning_rate": 5.360190413950125e-05, "loss": 0.8477, "step": 189600 }, { "epoch": 3.328885689706631, "grad_norm": 0.055031810040797866, "learning_rate": 5.359212098663278e-05, "loss": 0.8341, "step": 189610 }, { "epoch": 3.3290612545866325, "grad_norm": 0.0633188913679895, "learning_rate": 5.358233843443121e-05, "loss": 0.8402, "step": 189620 }, { "epoch": 3.329236819466634, "grad_norm": 0.042233698634467944, "learning_rate": 5.357255648302056e-05, "loss": 0.8364, "step": 189630 }, { "epoch": 3.3294123843466354, "grad_norm": 0.058998416770341344, "learning_rate": 5.356277513252467e-05, "loss": 0.8445, "step": 189640 }, { "epoch": 3.329587949226637, "grad_norm": 0.05074396786073412, "learning_rate": 5.355299438306749e-05, "loss": 0.842, "step": 189650 }, { "epoch": 3.329763514106638, "grad_norm": 0.0515964670616467, "learning_rate": 5.354321423477302e-05, "loss": 0.8369, "step": 189660 }, { "epoch": 3.3299390789866394, "grad_norm": 0.08425231340152768, "learning_rate": 5.353343468776508e-05, "loss": 0.8445, "step": 189670 }, { "epoch": 3.330114643866641, "grad_norm": 0.05592324098765087, "learning_rate": 5.352365574216762e-05, "loss": 0.8419, "step": 189680 }, { "epoch": 3.3302902087466424, "grad_norm": 0.05881557292760229, "learning_rate": 5.351387739810457e-05, "loss": 0.8405, "step": 189690 }, { "epoch": 3.330465773626644, "grad_norm": 0.15209186171572872, "learning_rate": 5.350409965569977e-05, "loss": 0.8361, "step": 189700 }, { "epoch": 3.330641338506645, "grad_norm": 0.05342823258494497, "learning_rate": 5.349432251507711e-05, "loss": 0.8389, "step": 189710 }, { "epoch": 3.3308169033866464, "grad_norm": 0.054560227889959505, "learning_rate": 5.34845459763605e-05, "loss": 0.841, "step": 189720 }, { "epoch": 3.330992468266648, "grad_norm": 0.047304482433185885, "learning_rate": 5.347477003967385e-05, "loss": 0.8407, "step": 189730 }, { "epoch": 3.3311680331466493, "grad_norm": 0.07737420694291107, "learning_rate": 5.3464994705140916e-05, "loss": 0.8435, "step": 189740 }, { "epoch": 3.331343598026651, "grad_norm": 0.0489716429813751, "learning_rate": 5.3455219972885605e-05, "loss": 0.8382, "step": 189750 }, { "epoch": 3.3315191629066523, "grad_norm": 0.060309394509053676, "learning_rate": 5.3445445843031826e-05, "loss": 0.839, "step": 189760 }, { "epoch": 3.3316947277866538, "grad_norm": 0.06146597429660073, "learning_rate": 5.343567231570331e-05, "loss": 0.8488, "step": 189770 }, { "epoch": 3.331870292666655, "grad_norm": 0.052332608740482706, "learning_rate": 5.3425899391023934e-05, "loss": 0.8452, "step": 189780 }, { "epoch": 3.3320458575466563, "grad_norm": 0.052166687694256954, "learning_rate": 5.341612706911754e-05, "loss": 0.838, "step": 189790 }, { "epoch": 3.3322214224266578, "grad_norm": 0.058188840410620425, "learning_rate": 5.3406355350107985e-05, "loss": 0.8313, "step": 189800 }, { "epoch": 3.3323969873066592, "grad_norm": 0.05235209430918068, "learning_rate": 5.339658423411897e-05, "loss": 0.8392, "step": 189810 }, { "epoch": 3.3325725521866607, "grad_norm": 0.04241793755208619, "learning_rate": 5.338681372127436e-05, "loss": 0.8434, "step": 189820 }, { "epoch": 3.3327481170666617, "grad_norm": 0.06655877848287461, "learning_rate": 5.337704381169799e-05, "loss": 0.8432, "step": 189830 }, { "epoch": 3.332923681946663, "grad_norm": 0.07108427566079556, "learning_rate": 5.336727450551357e-05, "loss": 0.8357, "step": 189840 }, { "epoch": 3.3330992468266647, "grad_norm": 0.04987518463202046, "learning_rate": 5.335750580284491e-05, "loss": 0.8448, "step": 189850 }, { "epoch": 3.333274811706666, "grad_norm": 0.04154132645543485, "learning_rate": 5.3347737703815816e-05, "loss": 0.8419, "step": 189860 }, { "epoch": 3.3334503765866677, "grad_norm": 0.08962594739910905, "learning_rate": 5.333797020854997e-05, "loss": 0.8463, "step": 189870 }, { "epoch": 3.333625941466669, "grad_norm": 0.05071444583396208, "learning_rate": 5.332820331717118e-05, "loss": 0.8462, "step": 189880 }, { "epoch": 3.3338015063466706, "grad_norm": 0.04588064239476597, "learning_rate": 5.3318437029803195e-05, "loss": 0.8382, "step": 189890 }, { "epoch": 3.3339770712266716, "grad_norm": 0.04863125218759956, "learning_rate": 5.330867134656979e-05, "loss": 0.8483, "step": 189900 }, { "epoch": 3.334152636106673, "grad_norm": 0.05696931732027708, "learning_rate": 5.329890626759462e-05, "loss": 0.8483, "step": 189910 }, { "epoch": 3.3343282009866746, "grad_norm": 0.07352158734629959, "learning_rate": 5.328914179300145e-05, "loss": 0.8408, "step": 189920 }, { "epoch": 3.334503765866676, "grad_norm": 0.05071599183702518, "learning_rate": 5.327937792291403e-05, "loss": 0.8404, "step": 189930 }, { "epoch": 3.3346793307466776, "grad_norm": 0.05133499665837229, "learning_rate": 5.326961465745601e-05, "loss": 0.842, "step": 189940 }, { "epoch": 3.334854895626679, "grad_norm": 0.051371254314752535, "learning_rate": 5.325985199675111e-05, "loss": 0.8394, "step": 189950 }, { "epoch": 3.33503046050668, "grad_norm": 0.08268500224919204, "learning_rate": 5.325008994092303e-05, "loss": 0.8463, "step": 189960 }, { "epoch": 3.3352060253866815, "grad_norm": 0.04446796982015695, "learning_rate": 5.3240328490095515e-05, "loss": 0.8443, "step": 189970 }, { "epoch": 3.335381590266683, "grad_norm": 0.061943274937434026, "learning_rate": 5.323056764439214e-05, "loss": 0.8412, "step": 189980 }, { "epoch": 3.3355571551466845, "grad_norm": 0.05319647359601944, "learning_rate": 5.3220807403936626e-05, "loss": 0.8494, "step": 189990 }, { "epoch": 3.335732720026686, "grad_norm": 0.05282210511550332, "learning_rate": 5.32110477688527e-05, "loss": 0.8363, "step": 190000 }, { "epoch": 3.3359082849066874, "grad_norm": 0.046447780115372594, "learning_rate": 5.32012887392639e-05, "loss": 0.838, "step": 190010 }, { "epoch": 3.3360838497866885, "grad_norm": 0.04729651221073054, "learning_rate": 5.3191530315293936e-05, "loss": 0.8415, "step": 190020 }, { "epoch": 3.33625941466669, "grad_norm": 0.04726186868815279, "learning_rate": 5.318177249706648e-05, "loss": 0.841, "step": 190030 }, { "epoch": 3.3364349795466914, "grad_norm": 0.0476636587605162, "learning_rate": 5.31720152847051e-05, "loss": 0.8458, "step": 190040 }, { "epoch": 3.336610544426693, "grad_norm": 0.045243972266877625, "learning_rate": 5.316225867833344e-05, "loss": 0.8417, "step": 190050 }, { "epoch": 3.3367861093066944, "grad_norm": 0.056886256271409204, "learning_rate": 5.315250267807513e-05, "loss": 0.839, "step": 190060 }, { "epoch": 3.336961674186696, "grad_norm": 0.04747029295674506, "learning_rate": 5.314274728405384e-05, "loss": 0.8388, "step": 190070 }, { "epoch": 3.3371372390666973, "grad_norm": 0.0670165965096357, "learning_rate": 5.3132992496393067e-05, "loss": 0.8449, "step": 190080 }, { "epoch": 3.3373128039466984, "grad_norm": 0.058131009093983256, "learning_rate": 5.312323831521644e-05, "loss": 0.8451, "step": 190090 }, { "epoch": 3.3374883688267, "grad_norm": 0.05304944685507681, "learning_rate": 5.311348474064761e-05, "loss": 0.8461, "step": 190100 }, { "epoch": 3.3376639337067013, "grad_norm": 0.04787622415486119, "learning_rate": 5.3103731772810075e-05, "loss": 0.8403, "step": 190110 }, { "epoch": 3.337839498586703, "grad_norm": 0.0583091607691032, "learning_rate": 5.309397941182741e-05, "loss": 0.8431, "step": 190120 }, { "epoch": 3.3380150634667043, "grad_norm": 0.038664385069156586, "learning_rate": 5.3084227657823225e-05, "loss": 0.8401, "step": 190130 }, { "epoch": 3.3381906283467053, "grad_norm": 0.04429642436867699, "learning_rate": 5.30744765109211e-05, "loss": 0.8447, "step": 190140 }, { "epoch": 3.338366193226707, "grad_norm": 0.04546357881050222, "learning_rate": 5.30647259712445e-05, "loss": 0.8407, "step": 190150 }, { "epoch": 3.3385417581067083, "grad_norm": 0.04580341044081266, "learning_rate": 5.3054976038917004e-05, "loss": 0.8403, "step": 190160 }, { "epoch": 3.3387173229867098, "grad_norm": 0.07604852658180039, "learning_rate": 5.30452267140622e-05, "loss": 0.8401, "step": 190170 }, { "epoch": 3.3388928878667112, "grad_norm": 0.05110613396996473, "learning_rate": 5.3035477996803505e-05, "loss": 0.8416, "step": 190180 }, { "epoch": 3.3390684527467127, "grad_norm": 0.05576829332538855, "learning_rate": 5.302572988726452e-05, "loss": 0.8439, "step": 190190 }, { "epoch": 3.339244017626714, "grad_norm": 0.046203485194732635, "learning_rate": 5.301598238556876e-05, "loss": 0.8362, "step": 190200 }, { "epoch": 3.3394195825067152, "grad_norm": 0.05018518535401116, "learning_rate": 5.300623549183966e-05, "loss": 0.851, "step": 190210 }, { "epoch": 3.3395951473867167, "grad_norm": 0.04505992159164537, "learning_rate": 5.299648920620076e-05, "loss": 0.846, "step": 190220 }, { "epoch": 3.339770712266718, "grad_norm": 0.038548419386683595, "learning_rate": 5.298674352877554e-05, "loss": 0.852, "step": 190230 }, { "epoch": 3.3399462771467197, "grad_norm": 0.08414073355293826, "learning_rate": 5.297699845968751e-05, "loss": 0.8421, "step": 190240 }, { "epoch": 3.340121842026721, "grad_norm": 0.05143087740026186, "learning_rate": 5.296725399906008e-05, "loss": 0.8419, "step": 190250 }, { "epoch": 3.340297406906722, "grad_norm": 0.048932662743289876, "learning_rate": 5.2957510147016754e-05, "loss": 0.846, "step": 190260 }, { "epoch": 3.3404729717867236, "grad_norm": 0.05859023747864608, "learning_rate": 5.294776690368101e-05, "loss": 0.8438, "step": 190270 }, { "epoch": 3.340648536666725, "grad_norm": 0.045353086826743216, "learning_rate": 5.2938024269176235e-05, "loss": 0.8409, "step": 190280 }, { "epoch": 3.3408241015467266, "grad_norm": 0.06091381668762484, "learning_rate": 5.2928282243625903e-05, "loss": 0.8379, "step": 190290 }, { "epoch": 3.340999666426728, "grad_norm": 0.04438870252642902, "learning_rate": 5.291854082715344e-05, "loss": 0.84, "step": 190300 }, { "epoch": 3.3411752313067296, "grad_norm": 0.04228208870922415, "learning_rate": 5.2908800019882334e-05, "loss": 0.8494, "step": 190310 }, { "epoch": 3.341350796186731, "grad_norm": 0.05961112335370485, "learning_rate": 5.28990598219359e-05, "loss": 0.8468, "step": 190320 }, { "epoch": 3.341526361066732, "grad_norm": 0.04534522105995815, "learning_rate": 5.28893202334376e-05, "loss": 0.8432, "step": 190330 }, { "epoch": 3.3417019259467335, "grad_norm": 0.05820723112193159, "learning_rate": 5.2879581254510865e-05, "loss": 0.8458, "step": 190340 }, { "epoch": 3.341877490826735, "grad_norm": 0.05112437194470751, "learning_rate": 5.286984288527903e-05, "loss": 0.8422, "step": 190350 }, { "epoch": 3.3420530557067365, "grad_norm": 0.0506952051762276, "learning_rate": 5.286010512586549e-05, "loss": 0.8418, "step": 190360 }, { "epoch": 3.342228620586738, "grad_norm": 0.04695314931791569, "learning_rate": 5.28503679763937e-05, "loss": 0.8353, "step": 190370 }, { "epoch": 3.342404185466739, "grad_norm": 0.05116800971079988, "learning_rate": 5.284063143698693e-05, "loss": 0.8442, "step": 190380 }, { "epoch": 3.3425797503467405, "grad_norm": 0.06382008732688994, "learning_rate": 5.2830895507768594e-05, "loss": 0.8379, "step": 190390 }, { "epoch": 3.342755315226742, "grad_norm": 0.05178294238931887, "learning_rate": 5.282116018886204e-05, "loss": 0.8364, "step": 190400 }, { "epoch": 3.3429308801067434, "grad_norm": 0.0470718146503585, "learning_rate": 5.2811425480390666e-05, "loss": 0.8375, "step": 190410 }, { "epoch": 3.343106444986745, "grad_norm": 0.04482021346731024, "learning_rate": 5.280169138247772e-05, "loss": 0.838, "step": 190420 }, { "epoch": 3.3432820098667464, "grad_norm": 0.05917915432872709, "learning_rate": 5.279195789524659e-05, "loss": 0.8342, "step": 190430 }, { "epoch": 3.343457574746748, "grad_norm": 0.06094321028442564, "learning_rate": 5.278222501882062e-05, "loss": 0.842, "step": 190440 }, { "epoch": 3.343633139626749, "grad_norm": 0.05343697851931588, "learning_rate": 5.2772492753323065e-05, "loss": 0.8438, "step": 190450 }, { "epoch": 3.3438087045067504, "grad_norm": 0.08207301718168192, "learning_rate": 5.2762761098877275e-05, "loss": 0.8436, "step": 190460 }, { "epoch": 3.343984269386752, "grad_norm": 0.04615646457066099, "learning_rate": 5.2753030055606544e-05, "loss": 0.838, "step": 190470 }, { "epoch": 3.3441598342667533, "grad_norm": 0.050337120050456645, "learning_rate": 5.274329962363421e-05, "loss": 0.8414, "step": 190480 }, { "epoch": 3.344335399146755, "grad_norm": 0.05075653627797945, "learning_rate": 5.273356980308347e-05, "loss": 0.8405, "step": 190490 }, { "epoch": 3.344510964026756, "grad_norm": 0.05324663515433989, "learning_rate": 5.272384059407766e-05, "loss": 0.8454, "step": 190500 }, { "epoch": 3.3446865289067573, "grad_norm": 0.04708662835511205, "learning_rate": 5.2714111996740065e-05, "loss": 0.846, "step": 190510 }, { "epoch": 3.344862093786759, "grad_norm": 0.04709992750829385, "learning_rate": 5.2704384011193906e-05, "loss": 0.8441, "step": 190520 }, { "epoch": 3.3450376586667603, "grad_norm": 0.05859445984691485, "learning_rate": 5.269465663756245e-05, "loss": 0.8367, "step": 190530 }, { "epoch": 3.3452132235467618, "grad_norm": 0.0498424374163307, "learning_rate": 5.268492987596899e-05, "loss": 0.8294, "step": 190540 }, { "epoch": 3.3453887884267632, "grad_norm": 0.04690352600700953, "learning_rate": 5.2675203726536685e-05, "loss": 0.8429, "step": 190550 }, { "epoch": 3.3455643533067647, "grad_norm": 0.04527921221620908, "learning_rate": 5.266547818938881e-05, "loss": 0.8357, "step": 190560 }, { "epoch": 3.3457399181867657, "grad_norm": 0.05587445940761521, "learning_rate": 5.26557532646486e-05, "loss": 0.8392, "step": 190570 }, { "epoch": 3.3459154830667672, "grad_norm": 0.043927965482848955, "learning_rate": 5.2646028952439286e-05, "loss": 0.8311, "step": 190580 }, { "epoch": 3.3460910479467687, "grad_norm": 0.05227686305626216, "learning_rate": 5.263630525288402e-05, "loss": 0.8421, "step": 190590 }, { "epoch": 3.34626661282677, "grad_norm": 0.06055686577473273, "learning_rate": 5.262658216610604e-05, "loss": 0.8376, "step": 190600 }, { "epoch": 3.3464421777067717, "grad_norm": 0.06115875606685929, "learning_rate": 5.261685969222857e-05, "loss": 0.8436, "step": 190610 }, { "epoch": 3.3466177425867727, "grad_norm": 0.06754793487751942, "learning_rate": 5.260713783137472e-05, "loss": 0.8348, "step": 190620 }, { "epoch": 3.346793307466774, "grad_norm": 0.04572822425898899, "learning_rate": 5.259741658366771e-05, "loss": 0.8422, "step": 190630 }, { "epoch": 3.3469688723467756, "grad_norm": 0.05122997139251615, "learning_rate": 5.258769594923071e-05, "loss": 0.8448, "step": 190640 }, { "epoch": 3.347144437226777, "grad_norm": 0.04974123550129526, "learning_rate": 5.257797592818692e-05, "loss": 0.8461, "step": 190650 }, { "epoch": 3.3473200021067786, "grad_norm": 0.07171226343235383, "learning_rate": 5.256825652065941e-05, "loss": 0.8388, "step": 190660 }, { "epoch": 3.34749556698678, "grad_norm": 0.04747523197594043, "learning_rate": 5.2558537726771375e-05, "loss": 0.8437, "step": 190670 }, { "epoch": 3.3476711318667816, "grad_norm": 0.062398389308279845, "learning_rate": 5.2548819546645987e-05, "loss": 0.8468, "step": 190680 }, { "epoch": 3.3478466967467826, "grad_norm": 0.0494899126738849, "learning_rate": 5.2539101980406304e-05, "loss": 0.8414, "step": 190690 }, { "epoch": 3.348022261626784, "grad_norm": 0.04602008765612902, "learning_rate": 5.2529385028175485e-05, "loss": 0.8503, "step": 190700 }, { "epoch": 3.3481978265067855, "grad_norm": 0.05165299434491655, "learning_rate": 5.2519668690076696e-05, "loss": 0.8362, "step": 190710 }, { "epoch": 3.348373391386787, "grad_norm": 0.06654753053359305, "learning_rate": 5.2509952966232945e-05, "loss": 0.8399, "step": 190720 }, { "epoch": 3.3485489562667885, "grad_norm": 0.07499227863837812, "learning_rate": 5.2500237856767386e-05, "loss": 0.8493, "step": 190730 }, { "epoch": 3.34872452114679, "grad_norm": 0.06075787780505727, "learning_rate": 5.2490523361803115e-05, "loss": 0.8413, "step": 190740 }, { "epoch": 3.348900086026791, "grad_norm": 0.05046717876523233, "learning_rate": 5.2480809481463236e-05, "loss": 0.8397, "step": 190750 }, { "epoch": 3.3490756509067925, "grad_norm": 0.04757993557531884, "learning_rate": 5.2471096215870774e-05, "loss": 0.8454, "step": 190760 }, { "epoch": 3.349251215786794, "grad_norm": 0.05237202352968707, "learning_rate": 5.246138356514881e-05, "loss": 0.8426, "step": 190770 }, { "epoch": 3.3494267806667954, "grad_norm": 0.04914089628240936, "learning_rate": 5.245167152942047e-05, "loss": 0.8418, "step": 190780 }, { "epoch": 3.349602345546797, "grad_norm": 0.04867320329679744, "learning_rate": 5.244196010880872e-05, "loss": 0.8428, "step": 190790 }, { "epoch": 3.3497779104267984, "grad_norm": 0.04848752585469435, "learning_rate": 5.2432249303436634e-05, "loss": 0.8353, "step": 190800 }, { "epoch": 3.3499534753067994, "grad_norm": 0.053336345822012264, "learning_rate": 5.242253911342725e-05, "loss": 0.8352, "step": 190810 }, { "epoch": 3.350129040186801, "grad_norm": 0.04530332534021975, "learning_rate": 5.241282953890365e-05, "loss": 0.8362, "step": 190820 }, { "epoch": 3.3503046050668024, "grad_norm": 0.0446044989266345, "learning_rate": 5.2403120579988776e-05, "loss": 0.8478, "step": 190830 }, { "epoch": 3.350480169946804, "grad_norm": 0.05787222833544451, "learning_rate": 5.239341223680567e-05, "loss": 0.8396, "step": 190840 }, { "epoch": 3.3506557348268053, "grad_norm": 0.05203573819910843, "learning_rate": 5.238370450947739e-05, "loss": 0.8402, "step": 190850 }, { "epoch": 3.350831299706807, "grad_norm": 0.055035385183026114, "learning_rate": 5.237399739812686e-05, "loss": 0.8443, "step": 190860 }, { "epoch": 3.3510068645868083, "grad_norm": 0.046579249913731156, "learning_rate": 5.2364290902877086e-05, "loss": 0.8354, "step": 190870 }, { "epoch": 3.3511824294668093, "grad_norm": 0.055980569446447845, "learning_rate": 5.235458502385112e-05, "loss": 0.8326, "step": 190880 }, { "epoch": 3.351357994346811, "grad_norm": 0.05612311784587836, "learning_rate": 5.2344879761171837e-05, "loss": 0.8435, "step": 190890 }, { "epoch": 3.3515335592268123, "grad_norm": 0.04390777018766173, "learning_rate": 5.233517511496225e-05, "loss": 0.8413, "step": 190900 }, { "epoch": 3.3517091241068138, "grad_norm": 0.049651480332631066, "learning_rate": 5.232547108534533e-05, "loss": 0.836, "step": 190910 }, { "epoch": 3.3518846889868152, "grad_norm": 0.0526098137351421, "learning_rate": 5.231576767244406e-05, "loss": 0.8431, "step": 190920 }, { "epoch": 3.3520602538668163, "grad_norm": 0.0645360501731601, "learning_rate": 5.23060648763813e-05, "loss": 0.8358, "step": 190930 }, { "epoch": 3.3522358187468178, "grad_norm": 0.050731070391274176, "learning_rate": 5.2296362697280035e-05, "loss": 0.8432, "step": 190940 }, { "epoch": 3.3524113836268192, "grad_norm": 0.04626447620341748, "learning_rate": 5.2286661135263215e-05, "loss": 0.8408, "step": 190950 }, { "epoch": 3.3525869485068207, "grad_norm": 0.058071643089364744, "learning_rate": 5.227696019045372e-05, "loss": 0.8398, "step": 190960 }, { "epoch": 3.352762513386822, "grad_norm": 0.045490869173398035, "learning_rate": 5.226725986297445e-05, "loss": 0.8476, "step": 190970 }, { "epoch": 3.3529380782668237, "grad_norm": 0.05018176341260143, "learning_rate": 5.225756015294835e-05, "loss": 0.8457, "step": 190980 }, { "epoch": 3.353113643146825, "grad_norm": 0.04706853521010304, "learning_rate": 5.224786106049834e-05, "loss": 0.8429, "step": 190990 }, { "epoch": 3.353289208026826, "grad_norm": 0.05073340975904758, "learning_rate": 5.223816258574723e-05, "loss": 0.8392, "step": 191000 }, { "epoch": 3.3534647729068277, "grad_norm": 0.05258735933821124, "learning_rate": 5.222846472881795e-05, "loss": 0.8495, "step": 191010 }, { "epoch": 3.353640337786829, "grad_norm": 0.07275318477813661, "learning_rate": 5.221876748983342e-05, "loss": 0.8394, "step": 191020 }, { "epoch": 3.3538159026668306, "grad_norm": 0.051964712825330564, "learning_rate": 5.220907086891639e-05, "loss": 0.8348, "step": 191030 }, { "epoch": 3.353991467546832, "grad_norm": 0.05133741667933899, "learning_rate": 5.219937486618981e-05, "loss": 0.8374, "step": 191040 }, { "epoch": 3.354167032426833, "grad_norm": 0.07298847078727372, "learning_rate": 5.218967948177652e-05, "loss": 0.84, "step": 191050 }, { "epoch": 3.3543425973068346, "grad_norm": 0.05458210620257251, "learning_rate": 5.2179984715799315e-05, "loss": 0.8427, "step": 191060 }, { "epoch": 3.354518162186836, "grad_norm": 0.0460278104520748, "learning_rate": 5.2170290568381066e-05, "loss": 0.8364, "step": 191070 }, { "epoch": 3.3546937270668375, "grad_norm": 0.048206780546615695, "learning_rate": 5.2160597039644586e-05, "loss": 0.8435, "step": 191080 }, { "epoch": 3.354869291946839, "grad_norm": 0.04840532971043061, "learning_rate": 5.215090412971274e-05, "loss": 0.8426, "step": 191090 }, { "epoch": 3.3550448568268405, "grad_norm": 0.05165681815567989, "learning_rate": 5.214121183870827e-05, "loss": 0.8401, "step": 191100 }, { "epoch": 3.355220421706842, "grad_norm": 0.07358868630576501, "learning_rate": 5.213152016675399e-05, "loss": 0.8455, "step": 191110 }, { "epoch": 3.355395986586843, "grad_norm": 0.048813698325806044, "learning_rate": 5.2121829113972775e-05, "loss": 0.8355, "step": 191120 }, { "epoch": 3.3555715514668445, "grad_norm": 0.05382755993771167, "learning_rate": 5.21121386804873e-05, "loss": 0.8495, "step": 191130 }, { "epoch": 3.355747116346846, "grad_norm": 0.05357854723340084, "learning_rate": 5.2102448866420416e-05, "loss": 0.843, "step": 191140 }, { "epoch": 3.3559226812268474, "grad_norm": 0.05592315065438146, "learning_rate": 5.2092759671894864e-05, "loss": 0.8421, "step": 191150 }, { "epoch": 3.356098246106849, "grad_norm": 0.06253019397358424, "learning_rate": 5.2083071097033455e-05, "loss": 0.8459, "step": 191160 }, { "epoch": 3.35627381098685, "grad_norm": 0.05696978542630176, "learning_rate": 5.207338314195891e-05, "loss": 0.849, "step": 191170 }, { "epoch": 3.3564493758668514, "grad_norm": 0.04718827988243918, "learning_rate": 5.206369580679391e-05, "loss": 0.8384, "step": 191180 }, { "epoch": 3.356624940746853, "grad_norm": 0.04972980005313652, "learning_rate": 5.2054009091661336e-05, "loss": 0.8408, "step": 191190 }, { "epoch": 3.3568005056268544, "grad_norm": 0.05579528610564679, "learning_rate": 5.20443229966838e-05, "loss": 0.8407, "step": 191200 }, { "epoch": 3.356976070506856, "grad_norm": 0.04945570656947227, "learning_rate": 5.2034637521984073e-05, "loss": 0.8406, "step": 191210 }, { "epoch": 3.3571516353868573, "grad_norm": 0.05295445827201562, "learning_rate": 5.2024952667684935e-05, "loss": 0.8349, "step": 191220 }, { "epoch": 3.357327200266859, "grad_norm": 0.06214827537156114, "learning_rate": 5.201526843390897e-05, "loss": 0.8446, "step": 191230 }, { "epoch": 3.35750276514686, "grad_norm": 0.05526814246049467, "learning_rate": 5.200558482077896e-05, "loss": 0.8435, "step": 191240 }, { "epoch": 3.3576783300268613, "grad_norm": 0.04435600299692391, "learning_rate": 5.199590182841756e-05, "loss": 0.8465, "step": 191250 }, { "epoch": 3.357853894906863, "grad_norm": 0.04802614076231742, "learning_rate": 5.198621945694753e-05, "loss": 0.8384, "step": 191260 }, { "epoch": 3.3580294597868643, "grad_norm": 0.05232071475621778, "learning_rate": 5.1976537706491446e-05, "loss": 0.8427, "step": 191270 }, { "epoch": 3.3582050246668658, "grad_norm": 0.055613264852500184, "learning_rate": 5.196685657717203e-05, "loss": 0.8391, "step": 191280 }, { "epoch": 3.358380589546867, "grad_norm": 0.0595309444155183, "learning_rate": 5.1957176069111965e-05, "loss": 0.8337, "step": 191290 }, { "epoch": 3.3585561544268683, "grad_norm": 0.04489667331094394, "learning_rate": 5.1947496182433854e-05, "loss": 0.8414, "step": 191300 }, { "epoch": 3.3587317193068698, "grad_norm": 0.05898817083571271, "learning_rate": 5.193781691726036e-05, "loss": 0.8385, "step": 191310 }, { "epoch": 3.3589072841868712, "grad_norm": 0.06836311669657767, "learning_rate": 5.1928138273714135e-05, "loss": 0.8366, "step": 191320 }, { "epoch": 3.3590828490668727, "grad_norm": 0.06762238956897854, "learning_rate": 5.1918460251917836e-05, "loss": 0.8335, "step": 191330 }, { "epoch": 3.359258413946874, "grad_norm": 0.06578271466290951, "learning_rate": 5.190878285199405e-05, "loss": 0.8428, "step": 191340 }, { "epoch": 3.3594339788268757, "grad_norm": 0.05593395034251765, "learning_rate": 5.1899106074065315e-05, "loss": 0.8352, "step": 191350 }, { "epoch": 3.3596095437068767, "grad_norm": 0.0443208222228264, "learning_rate": 5.1889429918254396e-05, "loss": 0.8433, "step": 191360 }, { "epoch": 3.359785108586878, "grad_norm": 0.05313140971938416, "learning_rate": 5.187975438468378e-05, "loss": 0.8357, "step": 191370 }, { "epoch": 3.3599606734668797, "grad_norm": 0.05235437181921441, "learning_rate": 5.187007947347608e-05, "loss": 0.8449, "step": 191380 }, { "epoch": 3.360136238346881, "grad_norm": 0.09068872084150574, "learning_rate": 5.1860405184753935e-05, "loss": 0.8336, "step": 191390 }, { "epoch": 3.3603118032268826, "grad_norm": 0.05573652387036431, "learning_rate": 5.1850731518639825e-05, "loss": 0.8368, "step": 191400 }, { "epoch": 3.360487368106884, "grad_norm": 0.07041833734607848, "learning_rate": 5.1841058475256376e-05, "loss": 0.8472, "step": 191410 }, { "epoch": 3.360662932986885, "grad_norm": 0.06945677529061446, "learning_rate": 5.183138605472613e-05, "loss": 0.8403, "step": 191420 }, { "epoch": 3.3608384978668866, "grad_norm": 0.05663234880852349, "learning_rate": 5.1821714257171695e-05, "loss": 0.8367, "step": 191430 }, { "epoch": 3.361014062746888, "grad_norm": 0.04279341620840209, "learning_rate": 5.181204308271552e-05, "loss": 0.847, "step": 191440 }, { "epoch": 3.3611896276268896, "grad_norm": 0.06071023712749484, "learning_rate": 5.1802372531480195e-05, "loss": 0.8351, "step": 191450 }, { "epoch": 3.361365192506891, "grad_norm": 0.05923957379804324, "learning_rate": 5.179270260358826e-05, "loss": 0.8358, "step": 191460 }, { "epoch": 3.3615407573868925, "grad_norm": 0.06364679682234625, "learning_rate": 5.1783033299162176e-05, "loss": 0.842, "step": 191470 }, { "epoch": 3.3617163222668935, "grad_norm": 0.05372783999142378, "learning_rate": 5.1773364618324513e-05, "loss": 0.8417, "step": 191480 }, { "epoch": 3.361891887146895, "grad_norm": 0.062211705138542774, "learning_rate": 5.176369656119774e-05, "loss": 0.8438, "step": 191490 }, { "epoch": 3.3620674520268965, "grad_norm": 0.045073437715726014, "learning_rate": 5.17540291279044e-05, "loss": 0.8412, "step": 191500 }, { "epoch": 3.362243016906898, "grad_norm": 0.0507509748817467, "learning_rate": 5.1744362318566965e-05, "loss": 0.8428, "step": 191510 }, { "epoch": 3.3624185817868995, "grad_norm": 0.06056414777656706, "learning_rate": 5.17346961333078e-05, "loss": 0.8403, "step": 191520 }, { "epoch": 3.362594146666901, "grad_norm": 0.06155625851065957, "learning_rate": 5.1725030572249576e-05, "loss": 0.8451, "step": 191530 }, { "epoch": 3.3627697115469024, "grad_norm": 0.07592780786338804, "learning_rate": 5.171536563551461e-05, "loss": 0.842, "step": 191540 }, { "epoch": 3.3629452764269034, "grad_norm": 0.06592555350249535, "learning_rate": 5.1705701323225406e-05, "loss": 0.8456, "step": 191550 }, { "epoch": 3.363120841306905, "grad_norm": 0.04346658818459692, "learning_rate": 5.1696037635504455e-05, "loss": 0.8405, "step": 191560 }, { "epoch": 3.3632964061869064, "grad_norm": 0.04999902101291993, "learning_rate": 5.168637457247412e-05, "loss": 0.8529, "step": 191570 }, { "epoch": 3.363471971066908, "grad_norm": 0.06162347658163204, "learning_rate": 5.1676712134256876e-05, "loss": 0.847, "step": 191580 }, { "epoch": 3.3636475359469094, "grad_norm": 0.058590562767727304, "learning_rate": 5.166705032097512e-05, "loss": 0.8365, "step": 191590 }, { "epoch": 3.3638231008269104, "grad_norm": 0.051831450495312376, "learning_rate": 5.165738913275134e-05, "loss": 0.8476, "step": 191600 }, { "epoch": 3.363998665706912, "grad_norm": 0.051505158997625373, "learning_rate": 5.164772856970786e-05, "loss": 0.8376, "step": 191610 }, { "epoch": 3.3641742305869133, "grad_norm": 0.05880654942371588, "learning_rate": 5.1638068631967117e-05, "loss": 0.8402, "step": 191620 }, { "epoch": 3.364349795466915, "grad_norm": 0.04860386560160922, "learning_rate": 5.162840931965155e-05, "loss": 0.8388, "step": 191630 }, { "epoch": 3.3645253603469163, "grad_norm": 0.056728674681503194, "learning_rate": 5.161875063288345e-05, "loss": 0.8383, "step": 191640 }, { "epoch": 3.3647009252269178, "grad_norm": 0.042502144034061336, "learning_rate": 5.160909257178525e-05, "loss": 0.8366, "step": 191650 }, { "epoch": 3.3648764901069192, "grad_norm": 0.06317656185424349, "learning_rate": 5.159943513647931e-05, "loss": 0.8432, "step": 191660 }, { "epoch": 3.3650520549869203, "grad_norm": 0.04851703091442338, "learning_rate": 5.158977832708803e-05, "loss": 0.8404, "step": 191670 }, { "epoch": 3.3652276198669218, "grad_norm": 0.04813583359949942, "learning_rate": 5.158012214373373e-05, "loss": 0.8453, "step": 191680 }, { "epoch": 3.3654031847469232, "grad_norm": 0.055897445659908375, "learning_rate": 5.157046658653868e-05, "loss": 0.8378, "step": 191690 }, { "epoch": 3.3655787496269247, "grad_norm": 0.05219186582634968, "learning_rate": 5.156081165562536e-05, "loss": 0.8336, "step": 191700 }, { "epoch": 3.365754314506926, "grad_norm": 0.0401720002365314, "learning_rate": 5.155115735111601e-05, "loss": 0.8386, "step": 191710 }, { "epoch": 3.3659298793869272, "grad_norm": 0.05946010610852469, "learning_rate": 5.154150367313295e-05, "loss": 0.8377, "step": 191720 }, { "epoch": 3.3661054442669287, "grad_norm": 0.053884823649920574, "learning_rate": 5.153185062179857e-05, "loss": 0.8445, "step": 191730 }, { "epoch": 3.36628100914693, "grad_norm": 0.08824128196160515, "learning_rate": 5.152219819723509e-05, "loss": 0.8432, "step": 191740 }, { "epoch": 3.3664565740269317, "grad_norm": 0.0495568756443934, "learning_rate": 5.151254639956483e-05, "loss": 0.8436, "step": 191750 }, { "epoch": 3.366632138906933, "grad_norm": 0.046577802705277005, "learning_rate": 5.1502895228910096e-05, "loss": 0.8458, "step": 191760 }, { "epoch": 3.3668077037869346, "grad_norm": 0.05326935994067747, "learning_rate": 5.149324468539321e-05, "loss": 0.8432, "step": 191770 }, { "epoch": 3.366983268666936, "grad_norm": 0.05916856711305766, "learning_rate": 5.148359476913635e-05, "loss": 0.8472, "step": 191780 }, { "epoch": 3.367158833546937, "grad_norm": 0.06543873456884586, "learning_rate": 5.1473945480261845e-05, "loss": 0.8487, "step": 191790 }, { "epoch": 3.3673343984269386, "grad_norm": 0.06275363568312523, "learning_rate": 5.1464296818891975e-05, "loss": 0.8413, "step": 191800 }, { "epoch": 3.36750996330694, "grad_norm": 0.04622519168504258, "learning_rate": 5.1454648785148915e-05, "loss": 0.8438, "step": 191810 }, { "epoch": 3.3676855281869416, "grad_norm": 0.05207079566811095, "learning_rate": 5.1445001379154955e-05, "loss": 0.8344, "step": 191820 }, { "epoch": 3.367861093066943, "grad_norm": 0.05538581591291978, "learning_rate": 5.1435354601032326e-05, "loss": 0.8417, "step": 191830 }, { "epoch": 3.368036657946944, "grad_norm": 0.055247490238291216, "learning_rate": 5.14257084509033e-05, "loss": 0.8325, "step": 191840 }, { "epoch": 3.3682122228269455, "grad_norm": 0.05845034804655482, "learning_rate": 5.141606292889003e-05, "loss": 0.8451, "step": 191850 }, { "epoch": 3.368387787706947, "grad_norm": 0.041857142799372964, "learning_rate": 5.140641803511469e-05, "loss": 0.8329, "step": 191860 }, { "epoch": 3.3685633525869485, "grad_norm": 0.047457777459788145, "learning_rate": 5.13967737696996e-05, "loss": 0.8379, "step": 191870 }, { "epoch": 3.36873891746695, "grad_norm": 0.05794785666784063, "learning_rate": 5.138713013276686e-05, "loss": 0.8376, "step": 191880 }, { "epoch": 3.3689144823469515, "grad_norm": 0.054210441567374916, "learning_rate": 5.1377487124438686e-05, "loss": 0.8387, "step": 191890 }, { "epoch": 3.369090047226953, "grad_norm": 0.04928852933486844, "learning_rate": 5.136784474483729e-05, "loss": 0.8437, "step": 191900 }, { "epoch": 3.369265612106954, "grad_norm": 0.05175914735161534, "learning_rate": 5.135820299408479e-05, "loss": 0.8322, "step": 191910 }, { "epoch": 3.3694411769869554, "grad_norm": 0.06813651090165655, "learning_rate": 5.134856187230337e-05, "loss": 0.8435, "step": 191920 }, { "epoch": 3.369616741866957, "grad_norm": 0.05417962259801, "learning_rate": 5.133892137961518e-05, "loss": 0.837, "step": 191930 }, { "epoch": 3.3697923067469584, "grad_norm": 0.0668328623175882, "learning_rate": 5.132928151614241e-05, "loss": 0.8435, "step": 191940 }, { "epoch": 3.36996787162696, "grad_norm": 0.06191291765279457, "learning_rate": 5.131964228200714e-05, "loss": 0.8379, "step": 191950 }, { "epoch": 3.370143436506961, "grad_norm": 0.05582153942910568, "learning_rate": 5.13100036773315e-05, "loss": 0.84, "step": 191960 }, { "epoch": 3.3703190013869624, "grad_norm": 0.046320758290574304, "learning_rate": 5.13003657022377e-05, "loss": 0.8426, "step": 191970 }, { "epoch": 3.370494566266964, "grad_norm": 0.051675984679389726, "learning_rate": 5.129072835684774e-05, "loss": 0.8338, "step": 191980 }, { "epoch": 3.3706701311469653, "grad_norm": 0.047745619098106894, "learning_rate": 5.1281091641283774e-05, "loss": 0.8461, "step": 191990 }, { "epoch": 3.370845696026967, "grad_norm": 0.05766773648025234, "learning_rate": 5.1271455555667914e-05, "loss": 0.8349, "step": 192000 }, { "epoch": 3.3710212609069683, "grad_norm": 0.048402917377727464, "learning_rate": 5.126182010012227e-05, "loss": 0.8424, "step": 192010 }, { "epoch": 3.3711968257869698, "grad_norm": 0.058862230635301466, "learning_rate": 5.1252185274768905e-05, "loss": 0.8381, "step": 192020 }, { "epoch": 3.371372390666971, "grad_norm": 0.04915711962912607, "learning_rate": 5.124255107972982e-05, "loss": 0.8442, "step": 192030 }, { "epoch": 3.3715479555469723, "grad_norm": 0.04946065812090965, "learning_rate": 5.123291751512721e-05, "loss": 0.8399, "step": 192040 }, { "epoch": 3.3717235204269738, "grad_norm": 0.05557752797608386, "learning_rate": 5.1223284581083044e-05, "loss": 0.8391, "step": 192050 }, { "epoch": 3.3718990853069752, "grad_norm": 0.05654937079336426, "learning_rate": 5.121365227771939e-05, "loss": 0.8381, "step": 192060 }, { "epoch": 3.3720746501869767, "grad_norm": 0.06430008754117306, "learning_rate": 5.1204020605158353e-05, "loss": 0.8454, "step": 192070 }, { "epoch": 3.3722502150669778, "grad_norm": 0.06481225082879648, "learning_rate": 5.119438956352188e-05, "loss": 0.8439, "step": 192080 }, { "epoch": 3.3724257799469792, "grad_norm": 0.05257766167308451, "learning_rate": 5.118475915293203e-05, "loss": 0.8365, "step": 192090 }, { "epoch": 3.3726013448269807, "grad_norm": 0.05207966497299352, "learning_rate": 5.117512937351083e-05, "loss": 0.8369, "step": 192100 }, { "epoch": 3.372776909706982, "grad_norm": 0.08291615981294996, "learning_rate": 5.116550022538034e-05, "loss": 0.8401, "step": 192110 }, { "epoch": 3.3729524745869837, "grad_norm": 0.04743404765986916, "learning_rate": 5.115587170866245e-05, "loss": 0.8439, "step": 192120 }, { "epoch": 3.373128039466985, "grad_norm": 0.051144077270566725, "learning_rate": 5.1146243823479244e-05, "loss": 0.8403, "step": 192130 }, { "epoch": 3.3733036043469866, "grad_norm": 0.0481452031033534, "learning_rate": 5.11366165699527e-05, "loss": 0.8433, "step": 192140 }, { "epoch": 3.3734791692269877, "grad_norm": 0.05962443595907689, "learning_rate": 5.112698994820475e-05, "loss": 0.8406, "step": 192150 }, { "epoch": 3.373654734106989, "grad_norm": 0.04960791706636908, "learning_rate": 5.111736395835741e-05, "loss": 0.8408, "step": 192160 }, { "epoch": 3.3738302989869906, "grad_norm": 0.049815556073349296, "learning_rate": 5.110773860053262e-05, "loss": 0.8443, "step": 192170 }, { "epoch": 3.374005863866992, "grad_norm": 0.053565484137403634, "learning_rate": 5.109811387485237e-05, "loss": 0.8393, "step": 192180 }, { "epoch": 3.3741814287469936, "grad_norm": 0.09300784162746954, "learning_rate": 5.108848978143859e-05, "loss": 0.8419, "step": 192190 }, { "epoch": 3.374356993626995, "grad_norm": 0.05335327469545468, "learning_rate": 5.107886632041312e-05, "loss": 0.8459, "step": 192200 }, { "epoch": 3.374532558506996, "grad_norm": 0.06681633703199054, "learning_rate": 5.106924349189808e-05, "loss": 0.8324, "step": 192210 }, { "epoch": 3.3747081233869975, "grad_norm": 0.0511839618763199, "learning_rate": 5.105962129601524e-05, "loss": 0.8506, "step": 192220 }, { "epoch": 3.374883688266999, "grad_norm": 0.05303850861085858, "learning_rate": 5.1049999732886575e-05, "loss": 0.8438, "step": 192230 }, { "epoch": 3.3750592531470005, "grad_norm": 0.05021874125237089, "learning_rate": 5.1040378802634035e-05, "loss": 0.8499, "step": 192240 }, { "epoch": 3.375234818027002, "grad_norm": 0.053724173014155135, "learning_rate": 5.1030758505379414e-05, "loss": 0.8432, "step": 192250 }, { "epoch": 3.3754103829070035, "grad_norm": 0.06012699836340906, "learning_rate": 5.1021138841244665e-05, "loss": 0.8449, "step": 192260 }, { "epoch": 3.3755859477870045, "grad_norm": 0.053053366828231294, "learning_rate": 5.1011519810351674e-05, "loss": 0.8444, "step": 192270 }, { "epoch": 3.375761512667006, "grad_norm": 0.052185705370771295, "learning_rate": 5.100190141282234e-05, "loss": 0.8462, "step": 192280 }, { "epoch": 3.3759370775470074, "grad_norm": 0.04927080323607024, "learning_rate": 5.099228364877845e-05, "loss": 0.8462, "step": 192290 }, { "epoch": 3.376112642427009, "grad_norm": 0.0429754736279009, "learning_rate": 5.0982666518341916e-05, "loss": 0.8414, "step": 192300 }, { "epoch": 3.3762882073070104, "grad_norm": 0.052197676697870565, "learning_rate": 5.097305002163463e-05, "loss": 0.8389, "step": 192310 }, { "epoch": 3.376463772187012, "grad_norm": 0.060674080656408606, "learning_rate": 5.0963434158778346e-05, "loss": 0.8355, "step": 192320 }, { "epoch": 3.3766393370670134, "grad_norm": 0.05302878871277978, "learning_rate": 5.0953818929894924e-05, "loss": 0.8336, "step": 192330 }, { "epoch": 3.3768149019470144, "grad_norm": 0.046573320090396664, "learning_rate": 5.094420433510623e-05, "loss": 0.8315, "step": 192340 }, { "epoch": 3.376990466827016, "grad_norm": 0.055666950866993935, "learning_rate": 5.093459037453409e-05, "loss": 0.8437, "step": 192350 }, { "epoch": 3.3771660317070173, "grad_norm": 0.05133766462278727, "learning_rate": 5.092497704830028e-05, "loss": 0.847, "step": 192360 }, { "epoch": 3.377341596587019, "grad_norm": 0.05298455408529404, "learning_rate": 5.091536435652653e-05, "loss": 0.8424, "step": 192370 }, { "epoch": 3.3775171614670203, "grad_norm": 0.05438454959913241, "learning_rate": 5.0905752299334794e-05, "loss": 0.8339, "step": 192380 }, { "epoch": 3.3776927263470213, "grad_norm": 0.06975310770957757, "learning_rate": 5.089614087684674e-05, "loss": 0.8373, "step": 192390 }, { "epoch": 3.377868291227023, "grad_norm": 0.06184992354326768, "learning_rate": 5.0886530089184184e-05, "loss": 0.835, "step": 192400 }, { "epoch": 3.3780438561070243, "grad_norm": 0.05906375575376082, "learning_rate": 5.0876919936468926e-05, "loss": 0.8409, "step": 192410 }, { "epoch": 3.3782194209870258, "grad_norm": 0.050418174274934675, "learning_rate": 5.086731041882268e-05, "loss": 0.8488, "step": 192420 }, { "epoch": 3.3783949858670272, "grad_norm": 0.04720900293329408, "learning_rate": 5.08577015363672e-05, "loss": 0.8409, "step": 192430 }, { "epoch": 3.3785705507470287, "grad_norm": 0.052797997276673496, "learning_rate": 5.084809328922425e-05, "loss": 0.8416, "step": 192440 }, { "epoch": 3.37874611562703, "grad_norm": 0.061037656332097984, "learning_rate": 5.083848567751561e-05, "loss": 0.8304, "step": 192450 }, { "epoch": 3.3789216805070312, "grad_norm": 0.05542562532067787, "learning_rate": 5.082887870136295e-05, "loss": 0.8261, "step": 192460 }, { "epoch": 3.3790972453870327, "grad_norm": 0.04199716630062572, "learning_rate": 5.0819272360887996e-05, "loss": 0.8312, "step": 192470 }, { "epoch": 3.379272810267034, "grad_norm": 0.04894132434262027, "learning_rate": 5.080966665621252e-05, "loss": 0.8483, "step": 192480 }, { "epoch": 3.3794483751470357, "grad_norm": 0.051236824239141, "learning_rate": 5.080006158745815e-05, "loss": 0.839, "step": 192490 }, { "epoch": 3.379623940027037, "grad_norm": 0.051353890651156524, "learning_rate": 5.079045715474661e-05, "loss": 0.841, "step": 192500 }, { "epoch": 3.379799504907038, "grad_norm": 0.056389490885585106, "learning_rate": 5.0780853358199607e-05, "loss": 0.8424, "step": 192510 }, { "epoch": 3.3799750697870397, "grad_norm": 0.04414363007163887, "learning_rate": 5.0771250197938856e-05, "loss": 0.8419, "step": 192520 }, { "epoch": 3.380150634667041, "grad_norm": 0.05587936206015871, "learning_rate": 5.076164767408599e-05, "loss": 0.8433, "step": 192530 }, { "epoch": 3.3803261995470426, "grad_norm": 0.052601557574251244, "learning_rate": 5.0752045786762604e-05, "loss": 0.8453, "step": 192540 }, { "epoch": 3.380501764427044, "grad_norm": 0.07488684185343178, "learning_rate": 5.0742444536090504e-05, "loss": 0.8354, "step": 192550 }, { "epoch": 3.3806773293070456, "grad_norm": 0.06051354012192152, "learning_rate": 5.073284392219121e-05, "loss": 0.8435, "step": 192560 }, { "epoch": 3.380852894187047, "grad_norm": 0.05172873410150117, "learning_rate": 5.072324394518643e-05, "loss": 0.8468, "step": 192570 }, { "epoch": 3.381028459067048, "grad_norm": 0.07131127116625621, "learning_rate": 5.071364460519783e-05, "loss": 0.8479, "step": 192580 }, { "epoch": 3.3812040239470496, "grad_norm": 0.05628512176959717, "learning_rate": 5.070404590234694e-05, "loss": 0.8434, "step": 192590 }, { "epoch": 3.381379588827051, "grad_norm": 0.060441255522993766, "learning_rate": 5.069444783675543e-05, "loss": 0.8309, "step": 192600 }, { "epoch": 3.3815551537070525, "grad_norm": 0.05193874013227099, "learning_rate": 5.068485040854491e-05, "loss": 0.8384, "step": 192610 }, { "epoch": 3.381730718587054, "grad_norm": 0.09324846530437261, "learning_rate": 5.0675253617837017e-05, "loss": 0.8388, "step": 192620 }, { "epoch": 3.381906283467055, "grad_norm": 0.05803163247360217, "learning_rate": 5.066565746475326e-05, "loss": 0.8323, "step": 192630 }, { "epoch": 3.3820818483470565, "grad_norm": 0.06954442741106749, "learning_rate": 5.065606194941527e-05, "loss": 0.8367, "step": 192640 }, { "epoch": 3.382257413227058, "grad_norm": 0.05062620043936373, "learning_rate": 5.064646707194467e-05, "loss": 0.8433, "step": 192650 }, { "epoch": 3.3824329781070595, "grad_norm": 0.046110053183824655, "learning_rate": 5.0636872832462944e-05, "loss": 0.8381, "step": 192660 }, { "epoch": 3.382608542987061, "grad_norm": 0.09950168211040983, "learning_rate": 5.0627279231091694e-05, "loss": 0.8463, "step": 192670 }, { "epoch": 3.3827841078670624, "grad_norm": 0.07997822009373241, "learning_rate": 5.061768626795246e-05, "loss": 0.8448, "step": 192680 }, { "epoch": 3.382959672747064, "grad_norm": 0.057160974489997476, "learning_rate": 5.060809394316683e-05, "loss": 0.8354, "step": 192690 }, { "epoch": 3.383135237627065, "grad_norm": 0.05517986946028042, "learning_rate": 5.0598502256856316e-05, "loss": 0.8414, "step": 192700 }, { "epoch": 3.3833108025070664, "grad_norm": 0.050684166985397586, "learning_rate": 5.0588911209142365e-05, "loss": 0.8386, "step": 192710 }, { "epoch": 3.383486367387068, "grad_norm": 0.05133565547725073, "learning_rate": 5.0579320800146645e-05, "loss": 0.841, "step": 192720 }, { "epoch": 3.3836619322670693, "grad_norm": 0.0458838601283481, "learning_rate": 5.056973102999055e-05, "loss": 0.8411, "step": 192730 }, { "epoch": 3.383837497147071, "grad_norm": 0.06057068049103058, "learning_rate": 5.0560141898795635e-05, "loss": 0.8408, "step": 192740 }, { "epoch": 3.384013062027072, "grad_norm": 0.04869097357163453, "learning_rate": 5.055055340668342e-05, "loss": 0.8393, "step": 192750 }, { "epoch": 3.3841886269070733, "grad_norm": 0.048984920320958705, "learning_rate": 5.0540965553775335e-05, "loss": 0.8393, "step": 192760 }, { "epoch": 3.384364191787075, "grad_norm": 0.05095472669972273, "learning_rate": 5.053137834019288e-05, "loss": 0.8395, "step": 192770 }, { "epoch": 3.3845397566670763, "grad_norm": 0.05039863603623326, "learning_rate": 5.052179176605752e-05, "loss": 0.8413, "step": 192780 }, { "epoch": 3.3847153215470778, "grad_norm": 0.05481204949869155, "learning_rate": 5.051220583149079e-05, "loss": 0.8406, "step": 192790 }, { "epoch": 3.3848908864270792, "grad_norm": 0.051093534914525264, "learning_rate": 5.050262053661404e-05, "loss": 0.8475, "step": 192800 }, { "epoch": 3.3850664513070807, "grad_norm": 0.050257993975720436, "learning_rate": 5.049303588154877e-05, "loss": 0.8319, "step": 192810 }, { "epoch": 3.3852420161870818, "grad_norm": 0.04887878194982596, "learning_rate": 5.0483451866416454e-05, "loss": 0.8403, "step": 192820 }, { "epoch": 3.3854175810670832, "grad_norm": 0.053492554144958945, "learning_rate": 5.047386849133844e-05, "loss": 0.8383, "step": 192830 }, { "epoch": 3.3855931459470847, "grad_norm": 0.05379453378645171, "learning_rate": 5.0464285756436204e-05, "loss": 0.8454, "step": 192840 }, { "epoch": 3.385768710827086, "grad_norm": 0.057670727773465456, "learning_rate": 5.0454703661831136e-05, "loss": 0.8457, "step": 192850 }, { "epoch": 3.3859442757070877, "grad_norm": 0.04754326516003908, "learning_rate": 5.0445122207644716e-05, "loss": 0.8379, "step": 192860 }, { "epoch": 3.386119840587089, "grad_norm": 0.04865214989489531, "learning_rate": 5.043554139399828e-05, "loss": 0.8408, "step": 192870 }, { "epoch": 3.38629540546709, "grad_norm": 0.06493854903804064, "learning_rate": 5.042596122101315e-05, "loss": 0.846, "step": 192880 }, { "epoch": 3.3864709703470917, "grad_norm": 0.04688253284227578, "learning_rate": 5.0416381688810854e-05, "loss": 0.8371, "step": 192890 }, { "epoch": 3.386646535227093, "grad_norm": 0.04722733144409066, "learning_rate": 5.040680279751266e-05, "loss": 0.8379, "step": 192900 }, { "epoch": 3.3868221001070946, "grad_norm": 0.05686962056159399, "learning_rate": 5.0397224547239986e-05, "loss": 0.8389, "step": 192910 }, { "epoch": 3.386997664987096, "grad_norm": 0.05045928263614919, "learning_rate": 5.0387646938114194e-05, "loss": 0.8529, "step": 192920 }, { "epoch": 3.3871732298670976, "grad_norm": 0.04974958182299057, "learning_rate": 5.037806997025659e-05, "loss": 0.8449, "step": 192930 }, { "epoch": 3.3873487947470986, "grad_norm": 0.05415654928207128, "learning_rate": 5.036849364378853e-05, "loss": 0.8426, "step": 192940 }, { "epoch": 3.3875243596271, "grad_norm": 0.051150427843309525, "learning_rate": 5.0358917958831365e-05, "loss": 0.841, "step": 192950 }, { "epoch": 3.3876999245071016, "grad_norm": 0.04900491601018708, "learning_rate": 5.034934291550645e-05, "loss": 0.8471, "step": 192960 }, { "epoch": 3.387875489387103, "grad_norm": 0.05413413296675288, "learning_rate": 5.033976851393504e-05, "loss": 0.8431, "step": 192970 }, { "epoch": 3.3880510542671045, "grad_norm": 0.05165896088415281, "learning_rate": 5.0330194754238446e-05, "loss": 0.8333, "step": 192980 }, { "epoch": 3.388226619147106, "grad_norm": 0.04909065607334043, "learning_rate": 5.0320621636538055e-05, "loss": 0.8398, "step": 192990 }, { "epoch": 3.3884021840271075, "grad_norm": 0.04487472017710912, "learning_rate": 5.031104916095506e-05, "loss": 0.8448, "step": 193000 }, { "epoch": 3.3885777489071085, "grad_norm": 0.06624433325119518, "learning_rate": 5.030147732761079e-05, "loss": 0.8428, "step": 193010 }, { "epoch": 3.38875331378711, "grad_norm": 0.06497497208607621, "learning_rate": 5.029190613662651e-05, "loss": 0.848, "step": 193020 }, { "epoch": 3.3889288786671115, "grad_norm": 0.046561621059193675, "learning_rate": 5.0282335588123544e-05, "loss": 0.8376, "step": 193030 }, { "epoch": 3.389104443547113, "grad_norm": 0.04897487281666881, "learning_rate": 5.02727656822231e-05, "loss": 0.8419, "step": 193040 }, { "epoch": 3.3892800084271144, "grad_norm": 0.05126618736063735, "learning_rate": 5.0263196419046355e-05, "loss": 0.8416, "step": 193050 }, { "epoch": 3.3894555733071154, "grad_norm": 0.05496008440037184, "learning_rate": 5.025362779871472e-05, "loss": 0.8462, "step": 193060 }, { "epoch": 3.389631138187117, "grad_norm": 0.06729269026943481, "learning_rate": 5.024405982134931e-05, "loss": 0.8438, "step": 193070 }, { "epoch": 3.3898067030671184, "grad_norm": 0.04676171280006008, "learning_rate": 5.023449248707138e-05, "loss": 0.8327, "step": 193080 }, { "epoch": 3.38998226794712, "grad_norm": 0.06550582417592682, "learning_rate": 5.0224925796002196e-05, "loss": 0.8411, "step": 193090 }, { "epoch": 3.3901578328271214, "grad_norm": 0.04407418414559984, "learning_rate": 5.021535974826291e-05, "loss": 0.8426, "step": 193100 }, { "epoch": 3.390333397707123, "grad_norm": 0.03941777716548067, "learning_rate": 5.020579434397473e-05, "loss": 0.8405, "step": 193110 }, { "epoch": 3.3905089625871243, "grad_norm": 0.04150227618294421, "learning_rate": 5.019622958325889e-05, "loss": 0.8493, "step": 193120 }, { "epoch": 3.3906845274671253, "grad_norm": 0.0564939902843735, "learning_rate": 5.018666546623658e-05, "loss": 0.8382, "step": 193130 }, { "epoch": 3.390860092347127, "grad_norm": 0.04713510886314179, "learning_rate": 5.017710199302893e-05, "loss": 0.8313, "step": 193140 }, { "epoch": 3.3910356572271283, "grad_norm": 0.07047094589439587, "learning_rate": 5.016753916375714e-05, "loss": 0.8457, "step": 193150 }, { "epoch": 3.3912112221071298, "grad_norm": 0.06388024781387096, "learning_rate": 5.01579769785424e-05, "loss": 0.836, "step": 193160 }, { "epoch": 3.3913867869871313, "grad_norm": 0.08382497600273339, "learning_rate": 5.014841543750579e-05, "loss": 0.8417, "step": 193170 }, { "epoch": 3.3915623518671323, "grad_norm": 0.040511059160894584, "learning_rate": 5.013885454076852e-05, "loss": 0.8459, "step": 193180 }, { "epoch": 3.3917379167471338, "grad_norm": 0.10287742179711523, "learning_rate": 5.01292942884517e-05, "loss": 0.8445, "step": 193190 }, { "epoch": 3.3919134816271352, "grad_norm": 0.05507742230680841, "learning_rate": 5.011973468067651e-05, "loss": 0.8412, "step": 193200 }, { "epoch": 3.3920890465071367, "grad_norm": 0.05785572447827023, "learning_rate": 5.0110175717564034e-05, "loss": 0.8458, "step": 193210 }, { "epoch": 3.392264611387138, "grad_norm": 0.06267531175026334, "learning_rate": 5.010061739923532e-05, "loss": 0.8442, "step": 193220 }, { "epoch": 3.3924401762671397, "grad_norm": 0.06980805878280484, "learning_rate": 5.0091059725811596e-05, "loss": 0.832, "step": 193230 }, { "epoch": 3.392615741147141, "grad_norm": 0.046756343242839664, "learning_rate": 5.0081502697413864e-05, "loss": 0.838, "step": 193240 }, { "epoch": 3.392791306027142, "grad_norm": 0.04921355428546376, "learning_rate": 5.0071946314163256e-05, "loss": 0.8353, "step": 193250 }, { "epoch": 3.3929668709071437, "grad_norm": 0.04751022705006046, "learning_rate": 5.0062390576180884e-05, "loss": 0.8436, "step": 193260 }, { "epoch": 3.393142435787145, "grad_norm": 0.057675346977721156, "learning_rate": 5.005283548358776e-05, "loss": 0.8413, "step": 193270 }, { "epoch": 3.3933180006671466, "grad_norm": 0.05299714034484361, "learning_rate": 5.004328103650496e-05, "loss": 0.8329, "step": 193280 }, { "epoch": 3.393493565547148, "grad_norm": 0.04487185110151435, "learning_rate": 5.0033727235053576e-05, "loss": 0.8448, "step": 193290 }, { "epoch": 3.393669130427149, "grad_norm": 0.05314376300515251, "learning_rate": 5.0024174079354644e-05, "loss": 0.8317, "step": 193300 }, { "epoch": 3.3938446953071506, "grad_norm": 0.06933736514059612, "learning_rate": 5.0014621569529177e-05, "loss": 0.8445, "step": 193310 }, { "epoch": 3.394020260187152, "grad_norm": 0.048409334125121585, "learning_rate": 5.0005069705698224e-05, "loss": 0.8372, "step": 193320 }, { "epoch": 3.3941958250671536, "grad_norm": 0.05103532184243316, "learning_rate": 4.999551848798285e-05, "loss": 0.8405, "step": 193330 }, { "epoch": 3.394371389947155, "grad_norm": 0.051078584343704166, "learning_rate": 4.998596791650399e-05, "loss": 0.855, "step": 193340 }, { "epoch": 3.3945469548271565, "grad_norm": 0.06372212766403614, "learning_rate": 4.9976417991382694e-05, "loss": 0.8355, "step": 193350 }, { "epoch": 3.394722519707158, "grad_norm": 0.051553052919880736, "learning_rate": 4.996686871273996e-05, "loss": 0.8463, "step": 193360 }, { "epoch": 3.394898084587159, "grad_norm": 0.04516745305503242, "learning_rate": 4.995732008069682e-05, "loss": 0.8373, "step": 193370 }, { "epoch": 3.3950736494671605, "grad_norm": 0.04659411419829517, "learning_rate": 4.9947772095374216e-05, "loss": 0.838, "step": 193380 }, { "epoch": 3.395249214347162, "grad_norm": 0.0638625881993247, "learning_rate": 4.993822475689305e-05, "loss": 0.849, "step": 193390 }, { "epoch": 3.3954247792271635, "grad_norm": 0.06394299372980679, "learning_rate": 4.9928678065374425e-05, "loss": 0.8332, "step": 193400 }, { "epoch": 3.395600344107165, "grad_norm": 0.05964343701048201, "learning_rate": 4.9919132020939205e-05, "loss": 0.8388, "step": 193410 }, { "epoch": 3.395775908987166, "grad_norm": 0.046728327701259904, "learning_rate": 4.9909586623708376e-05, "loss": 0.8405, "step": 193420 }, { "epoch": 3.3959514738671674, "grad_norm": 0.07728742144669959, "learning_rate": 4.9900041873802914e-05, "loss": 0.8273, "step": 193430 }, { "epoch": 3.396127038747169, "grad_norm": 0.0643739978007083, "learning_rate": 4.989049777134368e-05, "loss": 0.848, "step": 193440 }, { "epoch": 3.3963026036271704, "grad_norm": 0.048566313225714926, "learning_rate": 4.988095431645163e-05, "loss": 0.8443, "step": 193450 }, { "epoch": 3.396478168507172, "grad_norm": 0.061784130868471114, "learning_rate": 4.9871411509247695e-05, "loss": 0.8432, "step": 193460 }, { "epoch": 3.3966537333871734, "grad_norm": 0.04840480083937878, "learning_rate": 4.9861869349852796e-05, "loss": 0.8391, "step": 193470 }, { "epoch": 3.396829298267175, "grad_norm": 0.047603952870023065, "learning_rate": 4.9852327838387795e-05, "loss": 0.8311, "step": 193480 }, { "epoch": 3.397004863147176, "grad_norm": 0.05675104013585827, "learning_rate": 4.984278697497358e-05, "loss": 0.8341, "step": 193490 }, { "epoch": 3.3971804280271773, "grad_norm": 0.06166414260229416, "learning_rate": 4.983324675973111e-05, "loss": 0.836, "step": 193500 }, { "epoch": 3.397355992907179, "grad_norm": 0.054571701709906065, "learning_rate": 4.982370719278117e-05, "loss": 0.8416, "step": 193510 }, { "epoch": 3.3975315577871803, "grad_norm": 0.06599583646610371, "learning_rate": 4.9814168274244664e-05, "loss": 0.842, "step": 193520 }, { "epoch": 3.397707122667182, "grad_norm": 0.06383850112702254, "learning_rate": 4.980463000424246e-05, "loss": 0.8353, "step": 193530 }, { "epoch": 3.397882687547183, "grad_norm": 0.07836354551246208, "learning_rate": 4.979509238289545e-05, "loss": 0.8395, "step": 193540 }, { "epoch": 3.3980582524271843, "grad_norm": 0.06795671573605544, "learning_rate": 4.978555541032442e-05, "loss": 0.8428, "step": 193550 }, { "epoch": 3.3982338173071858, "grad_norm": 0.06537237144270112, "learning_rate": 4.977601908665014e-05, "loss": 0.8456, "step": 193560 }, { "epoch": 3.3984093821871872, "grad_norm": 0.05321635580663171, "learning_rate": 4.9766483411993606e-05, "loss": 0.842, "step": 193570 }, { "epoch": 3.3985849470671887, "grad_norm": 0.04716189841531798, "learning_rate": 4.9756948386475495e-05, "loss": 0.8388, "step": 193580 }, { "epoch": 3.39876051194719, "grad_norm": 0.05046675966325508, "learning_rate": 4.974741401021667e-05, "loss": 0.8429, "step": 193590 }, { "epoch": 3.3989360768271917, "grad_norm": 0.049520284762534304, "learning_rate": 4.973788028333797e-05, "loss": 0.8379, "step": 193600 }, { "epoch": 3.3991116417071927, "grad_norm": 0.05093249829755091, "learning_rate": 4.9728347205960125e-05, "loss": 0.8341, "step": 193610 }, { "epoch": 3.399287206587194, "grad_norm": 0.06308516580197185, "learning_rate": 4.971881477820393e-05, "loss": 0.8463, "step": 193620 }, { "epoch": 3.3994627714671957, "grad_norm": 0.05421758246723273, "learning_rate": 4.97092830001902e-05, "loss": 0.847, "step": 193630 }, { "epoch": 3.399638336347197, "grad_norm": 0.058024554111457724, "learning_rate": 4.96997518720397e-05, "loss": 0.8445, "step": 193640 }, { "epoch": 3.3998139012271986, "grad_norm": 0.05254125118496924, "learning_rate": 4.969022139387315e-05, "loss": 0.8451, "step": 193650 }, { "epoch": 3.3999894661072, "grad_norm": 0.07752010937699252, "learning_rate": 4.968069156581133e-05, "loss": 0.8455, "step": 193660 }, { "epoch": 3.400165030987201, "grad_norm": 0.06324442290406092, "learning_rate": 4.967116238797502e-05, "loss": 0.841, "step": 193670 }, { "epoch": 3.4003405958672026, "grad_norm": 0.05915850662511783, "learning_rate": 4.966163386048488e-05, "loss": 0.8397, "step": 193680 }, { "epoch": 3.400516160747204, "grad_norm": 0.05281511061946961, "learning_rate": 4.9652105983461677e-05, "loss": 0.8476, "step": 193690 }, { "epoch": 3.4006917256272056, "grad_norm": 0.06695043443062473, "learning_rate": 4.964257875702613e-05, "loss": 0.8361, "step": 193700 }, { "epoch": 3.400867290507207, "grad_norm": 0.060623540093087104, "learning_rate": 4.963305218129899e-05, "loss": 0.8392, "step": 193710 }, { "epoch": 3.4010428553872085, "grad_norm": 0.046959093603905114, "learning_rate": 4.962352625640093e-05, "loss": 0.8419, "step": 193720 }, { "epoch": 3.4012184202672096, "grad_norm": 0.05093838128782122, "learning_rate": 4.9614000982452566e-05, "loss": 0.8346, "step": 193730 }, { "epoch": 3.401393985147211, "grad_norm": 0.05162584402537782, "learning_rate": 4.960447635957472e-05, "loss": 0.8372, "step": 193740 }, { "epoch": 3.4015695500272125, "grad_norm": 0.055666742496156794, "learning_rate": 4.9594952387887975e-05, "loss": 0.8346, "step": 193750 }, { "epoch": 3.401745114907214, "grad_norm": 0.07083411742807359, "learning_rate": 4.958542906751303e-05, "loss": 0.8439, "step": 193760 }, { "epoch": 3.4019206797872155, "grad_norm": 0.05570123541333144, "learning_rate": 4.957590639857061e-05, "loss": 0.838, "step": 193770 }, { "epoch": 3.402096244667217, "grad_norm": 0.08089845271480547, "learning_rate": 4.9566384381181255e-05, "loss": 0.8372, "step": 193780 }, { "epoch": 3.4022718095472184, "grad_norm": 0.05031620555806912, "learning_rate": 4.955686301546567e-05, "loss": 0.8388, "step": 193790 }, { "epoch": 3.4024473744272195, "grad_norm": 0.04478206225198084, "learning_rate": 4.954734230154449e-05, "loss": 0.8422, "step": 193800 }, { "epoch": 3.402622939307221, "grad_norm": 0.07132328182518904, "learning_rate": 4.953782223953839e-05, "loss": 0.8417, "step": 193810 }, { "epoch": 3.4027985041872224, "grad_norm": 0.05867142893196741, "learning_rate": 4.95283028295679e-05, "loss": 0.8398, "step": 193820 }, { "epoch": 3.402974069067224, "grad_norm": 0.048920472493168735, "learning_rate": 4.951878407175368e-05, "loss": 0.8371, "step": 193830 }, { "epoch": 3.4031496339472254, "grad_norm": 0.05709892168331806, "learning_rate": 4.9509265966216366e-05, "loss": 0.8412, "step": 193840 }, { "epoch": 3.4033251988272264, "grad_norm": 0.044134644089008704, "learning_rate": 4.949974851307649e-05, "loss": 0.8408, "step": 193850 }, { "epoch": 3.403500763707228, "grad_norm": 0.05482120912349114, "learning_rate": 4.9490231712454666e-05, "loss": 0.8357, "step": 193860 }, { "epoch": 3.4036763285872293, "grad_norm": 0.0654500695575328, "learning_rate": 4.948071556447147e-05, "loss": 0.8446, "step": 193870 }, { "epoch": 3.403851893467231, "grad_norm": 0.05667092780579422, "learning_rate": 4.9471200069247534e-05, "loss": 0.8431, "step": 193880 }, { "epoch": 3.4040274583472323, "grad_norm": 0.05390243058512263, "learning_rate": 4.946168522690336e-05, "loss": 0.8453, "step": 193890 }, { "epoch": 3.404203023227234, "grad_norm": 0.06654182833834492, "learning_rate": 4.945217103755943e-05, "loss": 0.8459, "step": 193900 }, { "epoch": 3.4043785881072353, "grad_norm": 0.07514669460048515, "learning_rate": 4.9442657501336456e-05, "loss": 0.836, "step": 193910 }, { "epoch": 3.4045541529872363, "grad_norm": 0.04582838643852945, "learning_rate": 4.943314461835486e-05, "loss": 0.847, "step": 193920 }, { "epoch": 3.4047297178672378, "grad_norm": 0.06434905321097097, "learning_rate": 4.94236323887352e-05, "loss": 0.8399, "step": 193930 }, { "epoch": 3.4049052827472392, "grad_norm": 0.04177580701607192, "learning_rate": 4.941412081259803e-05, "loss": 0.8497, "step": 193940 }, { "epoch": 3.4050808476272407, "grad_norm": 0.05893963487951595, "learning_rate": 4.940460989006381e-05, "loss": 0.8461, "step": 193950 }, { "epoch": 3.405256412507242, "grad_norm": 0.051573562437512835, "learning_rate": 4.939509962125307e-05, "loss": 0.8449, "step": 193960 }, { "epoch": 3.4054319773872432, "grad_norm": 0.051165152234904275, "learning_rate": 4.9385590006286295e-05, "loss": 0.8378, "step": 193970 }, { "epoch": 3.4056075422672447, "grad_norm": 0.06714960078065206, "learning_rate": 4.937608104528404e-05, "loss": 0.8451, "step": 193980 }, { "epoch": 3.405783107147246, "grad_norm": 0.04800993719022226, "learning_rate": 4.936657273836669e-05, "loss": 0.8407, "step": 193990 }, { "epoch": 3.4059586720272477, "grad_norm": 0.05672136971746648, "learning_rate": 4.935706508565475e-05, "loss": 0.8367, "step": 194000 }, { "epoch": 3.406134236907249, "grad_norm": 0.05552057959787185, "learning_rate": 4.934755808726875e-05, "loss": 0.8486, "step": 194010 }, { "epoch": 3.4063098017872506, "grad_norm": 0.04570459502933644, "learning_rate": 4.933805174332903e-05, "loss": 0.8395, "step": 194020 }, { "epoch": 3.406485366667252, "grad_norm": 0.045660415862172024, "learning_rate": 4.93285460539561e-05, "loss": 0.8411, "step": 194030 }, { "epoch": 3.406660931547253, "grad_norm": 0.06806662084552341, "learning_rate": 4.93190410192704e-05, "loss": 0.8436, "step": 194040 }, { "epoch": 3.4068364964272546, "grad_norm": 0.0487952080070197, "learning_rate": 4.9309536639392386e-05, "loss": 0.854, "step": 194050 }, { "epoch": 3.407012061307256, "grad_norm": 0.04390802958731697, "learning_rate": 4.930003291444245e-05, "loss": 0.8413, "step": 194060 }, { "epoch": 3.4071876261872576, "grad_norm": 0.04966191170370364, "learning_rate": 4.929052984454094e-05, "loss": 0.8341, "step": 194070 }, { "epoch": 3.407363191067259, "grad_norm": 0.04569787000467765, "learning_rate": 4.928102742980839e-05, "loss": 0.8355, "step": 194080 }, { "epoch": 3.40753875594726, "grad_norm": 0.06412421520239515, "learning_rate": 4.927152567036511e-05, "loss": 0.847, "step": 194090 }, { "epoch": 3.4077143208272616, "grad_norm": 0.04521563782951533, "learning_rate": 4.9262024566331496e-05, "loss": 0.8432, "step": 194100 }, { "epoch": 3.407889885707263, "grad_norm": 0.05007326371474944, "learning_rate": 4.9252524117828004e-05, "loss": 0.8477, "step": 194110 }, { "epoch": 3.4080654505872645, "grad_norm": 0.05258204075683385, "learning_rate": 4.92430243249749e-05, "loss": 0.8345, "step": 194120 }, { "epoch": 3.408241015467266, "grad_norm": 0.04695565041007934, "learning_rate": 4.923352518789259e-05, "loss": 0.8444, "step": 194130 }, { "epoch": 3.4084165803472675, "grad_norm": 0.061998809247954485, "learning_rate": 4.922402670670145e-05, "loss": 0.8427, "step": 194140 }, { "epoch": 3.408592145227269, "grad_norm": 0.04932292675704862, "learning_rate": 4.921452888152184e-05, "loss": 0.8395, "step": 194150 }, { "epoch": 3.40876771010727, "grad_norm": 0.05564956981266433, "learning_rate": 4.920503171247405e-05, "loss": 0.8425, "step": 194160 }, { "epoch": 3.4089432749872715, "grad_norm": 0.101039056404874, "learning_rate": 4.9195535199678425e-05, "loss": 0.8356, "step": 194170 }, { "epoch": 3.409118839867273, "grad_norm": 0.07848125134755632, "learning_rate": 4.918603934325534e-05, "loss": 0.8362, "step": 194180 }, { "epoch": 3.4092944047472744, "grad_norm": 0.050206865611582274, "learning_rate": 4.917654414332502e-05, "loss": 0.8401, "step": 194190 }, { "epoch": 3.409469969627276, "grad_norm": 0.0536346011766557, "learning_rate": 4.916704960000782e-05, "loss": 0.846, "step": 194200 }, { "epoch": 3.409645534507277, "grad_norm": 0.05027977291654508, "learning_rate": 4.915755571342404e-05, "loss": 0.8376, "step": 194210 }, { "epoch": 3.4098210993872784, "grad_norm": 0.07664927121350057, "learning_rate": 4.9148062483694005e-05, "loss": 0.8458, "step": 194220 }, { "epoch": 3.40999666426728, "grad_norm": 0.055411071907644494, "learning_rate": 4.913856991093796e-05, "loss": 0.8504, "step": 194230 }, { "epoch": 3.4101722291472814, "grad_norm": 0.05635288004619002, "learning_rate": 4.912907799527608e-05, "loss": 0.8452, "step": 194240 }, { "epoch": 3.410347794027283, "grad_norm": 0.0475562236678883, "learning_rate": 4.9119586736828816e-05, "loss": 0.8442, "step": 194250 }, { "epoch": 3.4105233589072843, "grad_norm": 0.06464925628476453, "learning_rate": 4.911009613571628e-05, "loss": 0.8404, "step": 194260 }, { "epoch": 3.410698923787286, "grad_norm": 0.08472724851545443, "learning_rate": 4.9100606192058777e-05, "loss": 0.8348, "step": 194270 }, { "epoch": 3.410874488667287, "grad_norm": 0.054074708314567475, "learning_rate": 4.909111690597658e-05, "loss": 0.8433, "step": 194280 }, { "epoch": 3.4110500535472883, "grad_norm": 0.06155282216920898, "learning_rate": 4.908162827758984e-05, "loss": 0.8363, "step": 194290 }, { "epoch": 3.4112256184272898, "grad_norm": 0.04884427785058404, "learning_rate": 4.907214030701882e-05, "loss": 0.8352, "step": 194300 }, { "epoch": 3.4114011833072913, "grad_norm": 0.05491351603754522, "learning_rate": 4.906265299438372e-05, "loss": 0.8386, "step": 194310 }, { "epoch": 3.4115767481872927, "grad_norm": 0.048733291085006014, "learning_rate": 4.9053166339804824e-05, "loss": 0.8403, "step": 194320 }, { "epoch": 3.411752313067294, "grad_norm": 0.059772447649364464, "learning_rate": 4.904368034340222e-05, "loss": 0.842, "step": 194330 }, { "epoch": 3.4119278779472952, "grad_norm": 0.058232426078806666, "learning_rate": 4.903419500529612e-05, "loss": 0.8475, "step": 194340 }, { "epoch": 3.4121034428272967, "grad_norm": 0.05878455620358811, "learning_rate": 4.9024710325606796e-05, "loss": 0.8336, "step": 194350 }, { "epoch": 3.412279007707298, "grad_norm": 0.052351180578817415, "learning_rate": 4.9015226304454306e-05, "loss": 0.8342, "step": 194360 }, { "epoch": 3.4124545725872997, "grad_norm": 0.07931606680940201, "learning_rate": 4.9005742941958865e-05, "loss": 0.8405, "step": 194370 }, { "epoch": 3.412630137467301, "grad_norm": 0.05738990893547347, "learning_rate": 4.8996260238240615e-05, "loss": 0.8399, "step": 194380 }, { "epoch": 3.4128057023473026, "grad_norm": 0.04784742571584301, "learning_rate": 4.898677819341975e-05, "loss": 0.8376, "step": 194390 }, { "epoch": 3.4129812672273037, "grad_norm": 0.047129685299179574, "learning_rate": 4.897729680761638e-05, "loss": 0.8395, "step": 194400 }, { "epoch": 3.413156832107305, "grad_norm": 0.04876514712165687, "learning_rate": 4.8967816080950565e-05, "loss": 0.8327, "step": 194410 }, { "epoch": 3.4133323969873066, "grad_norm": 0.05637000800951124, "learning_rate": 4.895833601354255e-05, "loss": 0.8455, "step": 194420 }, { "epoch": 3.413507961867308, "grad_norm": 0.050729013408154944, "learning_rate": 4.8948856605512366e-05, "loss": 0.8437, "step": 194430 }, { "epoch": 3.4136835267473096, "grad_norm": 0.05634100706276529, "learning_rate": 4.893937785698015e-05, "loss": 0.8419, "step": 194440 }, { "epoch": 3.413859091627311, "grad_norm": 0.04484919277824969, "learning_rate": 4.8929899768066015e-05, "loss": 0.8382, "step": 194450 }, { "epoch": 3.4140346565073125, "grad_norm": 0.06979831523679456, "learning_rate": 4.892042233889e-05, "loss": 0.8401, "step": 194460 }, { "epoch": 3.4142102213873136, "grad_norm": 0.04718474806674701, "learning_rate": 4.891094556957222e-05, "loss": 0.8404, "step": 194470 }, { "epoch": 3.414385786267315, "grad_norm": 0.05952625607939998, "learning_rate": 4.890146946023273e-05, "loss": 0.8342, "step": 194480 }, { "epoch": 3.4145613511473165, "grad_norm": 0.06205361297454049, "learning_rate": 4.889199401099165e-05, "loss": 0.8312, "step": 194490 }, { "epoch": 3.414736916027318, "grad_norm": 0.05501928028854676, "learning_rate": 4.888251922196895e-05, "loss": 0.8455, "step": 194500 }, { "epoch": 3.4149124809073195, "grad_norm": 0.05157517778503616, "learning_rate": 4.887304509328472e-05, "loss": 0.835, "step": 194510 }, { "epoch": 3.4150880457873205, "grad_norm": 0.048002968688585966, "learning_rate": 4.8863571625059044e-05, "loss": 0.8377, "step": 194520 }, { "epoch": 3.415263610667322, "grad_norm": 0.04883995820291457, "learning_rate": 4.885409881741188e-05, "loss": 0.8424, "step": 194530 }, { "epoch": 3.4154391755473235, "grad_norm": 0.060038827219895584, "learning_rate": 4.884462667046325e-05, "loss": 0.8374, "step": 194540 }, { "epoch": 3.415614740427325, "grad_norm": 0.057890659626191984, "learning_rate": 4.883515518433321e-05, "loss": 0.839, "step": 194550 }, { "epoch": 3.4157903053073264, "grad_norm": 0.056656630810105024, "learning_rate": 4.882568435914179e-05, "loss": 0.8424, "step": 194560 }, { "epoch": 3.415965870187328, "grad_norm": 0.0430064859043324, "learning_rate": 4.881621419500895e-05, "loss": 0.8415, "step": 194570 }, { "epoch": 3.4161414350673294, "grad_norm": 0.04798302698982355, "learning_rate": 4.8806744692054604e-05, "loss": 0.8428, "step": 194580 }, { "epoch": 3.4163169999473304, "grad_norm": 0.05283042920216587, "learning_rate": 4.8797275850398866e-05, "loss": 0.8475, "step": 194590 }, { "epoch": 3.416492564827332, "grad_norm": 0.04920837852756628, "learning_rate": 4.8787807670161626e-05, "loss": 0.8499, "step": 194600 }, { "epoch": 3.4166681297073334, "grad_norm": 0.06735709885035475, "learning_rate": 4.877834015146286e-05, "loss": 0.8413, "step": 194610 }, { "epoch": 3.416843694587335, "grad_norm": 0.047733402939448624, "learning_rate": 4.876887329442258e-05, "loss": 0.8409, "step": 194620 }, { "epoch": 3.4170192594673363, "grad_norm": 0.05939315633600334, "learning_rate": 4.875940709916064e-05, "loss": 0.8351, "step": 194630 }, { "epoch": 3.4171948243473373, "grad_norm": 0.04477995753841529, "learning_rate": 4.8749941565797014e-05, "loss": 0.8404, "step": 194640 }, { "epoch": 3.417370389227339, "grad_norm": 0.04693209400527131, "learning_rate": 4.874047669445166e-05, "loss": 0.8414, "step": 194650 }, { "epoch": 3.4175459541073403, "grad_norm": 0.055605761216834936, "learning_rate": 4.873101248524451e-05, "loss": 0.8374, "step": 194660 }, { "epoch": 3.4177215189873418, "grad_norm": 0.0565069692518219, "learning_rate": 4.872154893829541e-05, "loss": 0.8438, "step": 194670 }, { "epoch": 3.4178970838673433, "grad_norm": 0.05132572348646881, "learning_rate": 4.871208605372431e-05, "loss": 0.8434, "step": 194680 }, { "epoch": 3.4180726487473447, "grad_norm": 0.05425708830610856, "learning_rate": 4.870262383165114e-05, "loss": 0.8414, "step": 194690 }, { "epoch": 3.418248213627346, "grad_norm": 0.05085700944977528, "learning_rate": 4.869316227219571e-05, "loss": 0.8444, "step": 194700 }, { "epoch": 3.4184237785073472, "grad_norm": 0.04768102503033974, "learning_rate": 4.8683701375477934e-05, "loss": 0.8433, "step": 194710 }, { "epoch": 3.4185993433873487, "grad_norm": 0.045619912367383164, "learning_rate": 4.867424114161769e-05, "loss": 0.8401, "step": 194720 }, { "epoch": 3.41877490826735, "grad_norm": 0.05250187397648932, "learning_rate": 4.8664781570734876e-05, "loss": 0.8334, "step": 194730 }, { "epoch": 3.4189504731473517, "grad_norm": 0.05177062508511226, "learning_rate": 4.8655322662949305e-05, "loss": 0.8342, "step": 194740 }, { "epoch": 3.419126038027353, "grad_norm": 0.05890094603461449, "learning_rate": 4.864586441838076e-05, "loss": 0.8467, "step": 194750 }, { "epoch": 3.419301602907354, "grad_norm": 0.06424506974306896, "learning_rate": 4.863640683714922e-05, "loss": 0.8426, "step": 194760 }, { "epoch": 3.4194771677873557, "grad_norm": 0.055414083047634004, "learning_rate": 4.86269499193744e-05, "loss": 0.8402, "step": 194770 }, { "epoch": 3.419652732667357, "grad_norm": 0.053295872946973855, "learning_rate": 4.861749366517617e-05, "loss": 0.846, "step": 194780 }, { "epoch": 3.4198282975473586, "grad_norm": 0.04674748020581778, "learning_rate": 4.8608038074674354e-05, "loss": 0.8407, "step": 194790 }, { "epoch": 3.42000386242736, "grad_norm": 0.057666588183842, "learning_rate": 4.859858314798871e-05, "loss": 0.8298, "step": 194800 }, { "epoch": 3.4201794273073616, "grad_norm": 0.05249880687976786, "learning_rate": 4.858912888523907e-05, "loss": 0.8409, "step": 194810 }, { "epoch": 3.420354992187363, "grad_norm": 0.048379581351801995, "learning_rate": 4.85796752865452e-05, "loss": 0.8359, "step": 194820 }, { "epoch": 3.420530557067364, "grad_norm": 0.0673837772401913, "learning_rate": 4.8570222352026925e-05, "loss": 0.8449, "step": 194830 }, { "epoch": 3.4207061219473656, "grad_norm": 0.05190546040459106, "learning_rate": 4.856077008180396e-05, "loss": 0.8435, "step": 194840 }, { "epoch": 3.420881686827367, "grad_norm": 0.057264249068045586, "learning_rate": 4.855131847599607e-05, "loss": 0.8409, "step": 194850 }, { "epoch": 3.4210572517073685, "grad_norm": 0.05425030822717686, "learning_rate": 4.854186753472307e-05, "loss": 0.8411, "step": 194860 }, { "epoch": 3.42123281658737, "grad_norm": 0.06180279839331746, "learning_rate": 4.853241725810463e-05, "loss": 0.837, "step": 194870 }, { "epoch": 3.421408381467371, "grad_norm": 0.061990260510265065, "learning_rate": 4.852296764626051e-05, "loss": 0.8411, "step": 194880 }, { "epoch": 3.4215839463473725, "grad_norm": 0.04812109198774781, "learning_rate": 4.851351869931045e-05, "loss": 0.8414, "step": 194890 }, { "epoch": 3.421759511227374, "grad_norm": 0.07693944249314111, "learning_rate": 4.850407041737421e-05, "loss": 0.8403, "step": 194900 }, { "epoch": 3.4219350761073755, "grad_norm": 0.04709764199890573, "learning_rate": 4.8494622800571444e-05, "loss": 0.8411, "step": 194910 }, { "epoch": 3.422110640987377, "grad_norm": 0.05353823705039114, "learning_rate": 4.848517584902181e-05, "loss": 0.8333, "step": 194920 }, { "epoch": 3.4222862058673784, "grad_norm": 0.06824954355173168, "learning_rate": 4.847572956284513e-05, "loss": 0.8386, "step": 194930 }, { "epoch": 3.42246177074738, "grad_norm": 0.042784820947494605, "learning_rate": 4.846628394216099e-05, "loss": 0.8474, "step": 194940 }, { "epoch": 3.422637335627381, "grad_norm": 0.046568957649862144, "learning_rate": 4.84568389870891e-05, "loss": 0.8366, "step": 194950 }, { "epoch": 3.4228129005073824, "grad_norm": 0.060393039520353894, "learning_rate": 4.844739469774917e-05, "loss": 0.8341, "step": 194960 }, { "epoch": 3.422988465387384, "grad_norm": 0.04581634350567502, "learning_rate": 4.843795107426077e-05, "loss": 0.8391, "step": 194970 }, { "epoch": 3.4231640302673854, "grad_norm": 0.05587560961102423, "learning_rate": 4.842850811674362e-05, "loss": 0.8343, "step": 194980 }, { "epoch": 3.423339595147387, "grad_norm": 0.057922071913518024, "learning_rate": 4.841906582531733e-05, "loss": 0.8402, "step": 194990 }, { "epoch": 3.4235151600273883, "grad_norm": 0.05190922930097808, "learning_rate": 4.84096242001016e-05, "loss": 0.8349, "step": 195000 }, { "epoch": 3.4236907249073893, "grad_norm": 0.05701682514681516, "learning_rate": 4.8400183241215974e-05, "loss": 0.8434, "step": 195010 }, { "epoch": 3.423866289787391, "grad_norm": 0.046650975848696936, "learning_rate": 4.83907429487801e-05, "loss": 0.8356, "step": 195020 }, { "epoch": 3.4240418546673923, "grad_norm": 0.04575022247750066, "learning_rate": 4.8381303322913644e-05, "loss": 0.8419, "step": 195030 }, { "epoch": 3.424217419547394, "grad_norm": 0.06077820160161449, "learning_rate": 4.8371864363736126e-05, "loss": 0.8391, "step": 195040 }, { "epoch": 3.4243929844273953, "grad_norm": 0.08785340878414707, "learning_rate": 4.8362426071367155e-05, "loss": 0.83, "step": 195050 }, { "epoch": 3.4245685493073967, "grad_norm": 0.0557000807820804, "learning_rate": 4.835298844592634e-05, "loss": 0.8451, "step": 195060 }, { "epoch": 3.4247441141873978, "grad_norm": 0.05149313680540499, "learning_rate": 4.83435514875333e-05, "loss": 0.8349, "step": 195070 }, { "epoch": 3.4249196790673992, "grad_norm": 0.07391155601493339, "learning_rate": 4.833411519630753e-05, "loss": 0.8485, "step": 195080 }, { "epoch": 3.4250952439474007, "grad_norm": 0.07162210382336336, "learning_rate": 4.832467957236857e-05, "loss": 0.8429, "step": 195090 }, { "epoch": 3.425270808827402, "grad_norm": 0.052125452458997024, "learning_rate": 4.831524461583607e-05, "loss": 0.8401, "step": 195100 }, { "epoch": 3.4254463737074037, "grad_norm": 0.06652538895620907, "learning_rate": 4.830581032682948e-05, "loss": 0.8403, "step": 195110 }, { "epoch": 3.425621938587405, "grad_norm": 0.05933363545835157, "learning_rate": 4.829637670546838e-05, "loss": 0.8433, "step": 195120 }, { "epoch": 3.425797503467406, "grad_norm": 0.06485448157837424, "learning_rate": 4.8286943751872324e-05, "loss": 0.8412, "step": 195130 }, { "epoch": 3.4259730683474077, "grad_norm": 0.06090682955754023, "learning_rate": 4.8277511466160744e-05, "loss": 0.8342, "step": 195140 }, { "epoch": 3.426148633227409, "grad_norm": 0.047449742914494705, "learning_rate": 4.826807984845321e-05, "loss": 0.8361, "step": 195150 }, { "epoch": 3.4263241981074106, "grad_norm": 0.06277850911487405, "learning_rate": 4.82586488988692e-05, "loss": 0.8434, "step": 195160 }, { "epoch": 3.426499762987412, "grad_norm": 0.06951774744109317, "learning_rate": 4.824921861752826e-05, "loss": 0.8376, "step": 195170 }, { "epoch": 3.4266753278674136, "grad_norm": 0.0629453914179549, "learning_rate": 4.8239789004549795e-05, "loss": 0.8352, "step": 195180 }, { "epoch": 3.4268508927474146, "grad_norm": 0.0541482462702964, "learning_rate": 4.823036006005332e-05, "loss": 0.8344, "step": 195190 }, { "epoch": 3.427026457627416, "grad_norm": 0.04514016745408344, "learning_rate": 4.822093178415832e-05, "loss": 0.8439, "step": 195200 }, { "epoch": 3.4272020225074176, "grad_norm": 0.05954742496296953, "learning_rate": 4.821150417698421e-05, "loss": 0.8397, "step": 195210 }, { "epoch": 3.427377587387419, "grad_norm": 0.06233899092567859, "learning_rate": 4.8202077238650436e-05, "loss": 0.8446, "step": 195220 }, { "epoch": 3.4275531522674205, "grad_norm": 0.044591043127678635, "learning_rate": 4.819265096927648e-05, "loss": 0.8502, "step": 195230 }, { "epoch": 3.427728717147422, "grad_norm": 0.05697036825598534, "learning_rate": 4.818322536898178e-05, "loss": 0.8526, "step": 195240 }, { "epoch": 3.4279042820274235, "grad_norm": 0.054868480391695076, "learning_rate": 4.817380043788575e-05, "loss": 0.8398, "step": 195250 }, { "epoch": 3.4280798469074245, "grad_norm": 0.04701127444935511, "learning_rate": 4.81643761761077e-05, "loss": 0.8373, "step": 195260 }, { "epoch": 3.428255411787426, "grad_norm": 0.05398004645174588, "learning_rate": 4.815495258376722e-05, "loss": 0.8386, "step": 195270 }, { "epoch": 3.4284309766674275, "grad_norm": 0.050123685950903496, "learning_rate": 4.814552966098358e-05, "loss": 0.8433, "step": 195280 }, { "epoch": 3.428606541547429, "grad_norm": 0.046196841788085065, "learning_rate": 4.8136107407876214e-05, "loss": 0.8343, "step": 195290 }, { "epoch": 3.4287821064274304, "grad_norm": 0.05422395952653248, "learning_rate": 4.812668582456452e-05, "loss": 0.8419, "step": 195300 }, { "epoch": 3.4289576713074315, "grad_norm": 0.050951414224088734, "learning_rate": 4.8117264911167825e-05, "loss": 0.8331, "step": 195310 }, { "epoch": 3.429133236187433, "grad_norm": 0.0658408846831527, "learning_rate": 4.810784466780551e-05, "loss": 0.8398, "step": 195320 }, { "epoch": 3.4293088010674344, "grad_norm": 0.0643751820714155, "learning_rate": 4.8098425094596946e-05, "loss": 0.846, "step": 195330 }, { "epoch": 3.429484365947436, "grad_norm": 0.051621481679089205, "learning_rate": 4.808900619166151e-05, "loss": 0.8398, "step": 195340 }, { "epoch": 3.4296599308274374, "grad_norm": 0.06700982726631366, "learning_rate": 4.807958795911846e-05, "loss": 0.8359, "step": 195350 }, { "epoch": 3.429835495707439, "grad_norm": 0.07047512680327787, "learning_rate": 4.8070170397087186e-05, "loss": 0.8416, "step": 195360 }, { "epoch": 3.4300110605874403, "grad_norm": 0.04884597009931388, "learning_rate": 4.8060753505687035e-05, "loss": 0.8351, "step": 195370 }, { "epoch": 3.4301866254674414, "grad_norm": 0.05717647041834128, "learning_rate": 4.805133728503723e-05, "loss": 0.8368, "step": 195380 }, { "epoch": 3.430362190347443, "grad_norm": 0.07469041079892912, "learning_rate": 4.804192173525714e-05, "loss": 0.838, "step": 195390 }, { "epoch": 3.4305377552274443, "grad_norm": 0.04795723233212944, "learning_rate": 4.803250685646605e-05, "loss": 0.8362, "step": 195400 }, { "epoch": 3.430713320107446, "grad_norm": 0.05576036010433958, "learning_rate": 4.802309264878328e-05, "loss": 0.8372, "step": 195410 }, { "epoch": 3.4308888849874473, "grad_norm": 0.055811944417536126, "learning_rate": 4.8013679112328095e-05, "loss": 0.839, "step": 195420 }, { "epoch": 3.4310644498674483, "grad_norm": 0.047003374260906006, "learning_rate": 4.8004266247219675e-05, "loss": 0.8381, "step": 195430 }, { "epoch": 3.4312400147474498, "grad_norm": 0.07456788103153786, "learning_rate": 4.799485405357742e-05, "loss": 0.8421, "step": 195440 }, { "epoch": 3.4314155796274513, "grad_norm": 0.051650507051066476, "learning_rate": 4.798544253152049e-05, "loss": 0.8379, "step": 195450 }, { "epoch": 3.4315911445074527, "grad_norm": 0.05744934395177363, "learning_rate": 4.797603168116816e-05, "loss": 0.8382, "step": 195460 }, { "epoch": 3.431766709387454, "grad_norm": 0.062105895781404935, "learning_rate": 4.796662150263971e-05, "loss": 0.8416, "step": 195470 }, { "epoch": 3.4319422742674557, "grad_norm": 0.05637915698492982, "learning_rate": 4.795721199605429e-05, "loss": 0.8398, "step": 195480 }, { "epoch": 3.432117839147457, "grad_norm": 0.048135043148526985, "learning_rate": 4.794780316153117e-05, "loss": 0.838, "step": 195490 }, { "epoch": 3.432293404027458, "grad_norm": 0.07528183078428438, "learning_rate": 4.793839499918954e-05, "loss": 0.8474, "step": 195500 }, { "epoch": 3.4324689689074597, "grad_norm": 0.05535158305260257, "learning_rate": 4.792898750914865e-05, "loss": 0.8391, "step": 195510 }, { "epoch": 3.432644533787461, "grad_norm": 0.05524466768095527, "learning_rate": 4.791958069152763e-05, "loss": 0.8417, "step": 195520 }, { "epoch": 3.4328200986674626, "grad_norm": 0.07073897902691206, "learning_rate": 4.79101745464457e-05, "loss": 0.838, "step": 195530 }, { "epoch": 3.432995663547464, "grad_norm": 0.058288394907415726, "learning_rate": 4.790076907402206e-05, "loss": 0.8384, "step": 195540 }, { "epoch": 3.433171228427465, "grad_norm": 0.04905045728177045, "learning_rate": 4.789136427437581e-05, "loss": 0.8483, "step": 195550 }, { "epoch": 3.4333467933074666, "grad_norm": 0.04756830470402652, "learning_rate": 4.788196014762617e-05, "loss": 0.8532, "step": 195560 }, { "epoch": 3.433522358187468, "grad_norm": 0.06862967459692124, "learning_rate": 4.7872556693892276e-05, "loss": 0.8458, "step": 195570 }, { "epoch": 3.4336979230674696, "grad_norm": 0.04916937728346072, "learning_rate": 4.786315391329329e-05, "loss": 0.835, "step": 195580 }, { "epoch": 3.433873487947471, "grad_norm": 0.051559450701315304, "learning_rate": 4.785375180594833e-05, "loss": 0.8416, "step": 195590 }, { "epoch": 3.4340490528274725, "grad_norm": 0.04864028691167062, "learning_rate": 4.784435037197645e-05, "loss": 0.8418, "step": 195600 }, { "epoch": 3.434224617707474, "grad_norm": 0.06434298584824033, "learning_rate": 4.783494961149692e-05, "loss": 0.8449, "step": 195610 }, { "epoch": 3.434400182587475, "grad_norm": 0.05087676709519241, "learning_rate": 4.7825549524628727e-05, "loss": 0.8433, "step": 195620 }, { "epoch": 3.4345757474674765, "grad_norm": 0.0523816371896874, "learning_rate": 4.781615011149101e-05, "loss": 0.834, "step": 195630 }, { "epoch": 3.434751312347478, "grad_norm": 0.053997021619054765, "learning_rate": 4.7806751372202896e-05, "loss": 0.8433, "step": 195640 }, { "epoch": 3.4349268772274795, "grad_norm": 0.06409044539296894, "learning_rate": 4.779735330688341e-05, "loss": 0.8423, "step": 195650 }, { "epoch": 3.435102442107481, "grad_norm": 0.05025471632546948, "learning_rate": 4.7787955915651636e-05, "loss": 0.8449, "step": 195660 }, { "epoch": 3.435278006987482, "grad_norm": 0.07473351660979602, "learning_rate": 4.7778559198626676e-05, "loss": 0.8405, "step": 195670 }, { "epoch": 3.4354535718674835, "grad_norm": 0.050089418911037256, "learning_rate": 4.7769163155927594e-05, "loss": 0.841, "step": 195680 }, { "epoch": 3.435629136747485, "grad_norm": 0.06221706993197521, "learning_rate": 4.775976778767339e-05, "loss": 0.8344, "step": 195690 }, { "epoch": 3.4358047016274864, "grad_norm": 0.043577046598010906, "learning_rate": 4.775037309398314e-05, "loss": 0.8432, "step": 195700 }, { "epoch": 3.435980266507488, "grad_norm": 0.057288859726025355, "learning_rate": 4.774097907497589e-05, "loss": 0.8357, "step": 195710 }, { "epoch": 3.4361558313874894, "grad_norm": 0.056726636328687685, "learning_rate": 4.773158573077061e-05, "loss": 0.8505, "step": 195720 }, { "epoch": 3.436331396267491, "grad_norm": 0.057695190027559295, "learning_rate": 4.772219306148635e-05, "loss": 0.8394, "step": 195730 }, { "epoch": 3.436506961147492, "grad_norm": 0.054912591726116795, "learning_rate": 4.771280106724211e-05, "loss": 0.8464, "step": 195740 }, { "epoch": 3.4366825260274934, "grad_norm": 0.04445308374417714, "learning_rate": 4.770340974815693e-05, "loss": 0.8463, "step": 195750 }, { "epoch": 3.436858090907495, "grad_norm": 0.05163489760046149, "learning_rate": 4.769401910434976e-05, "loss": 0.8438, "step": 195760 }, { "epoch": 3.4370336557874963, "grad_norm": 0.04276883526229845, "learning_rate": 4.768462913593951e-05, "loss": 0.836, "step": 195770 }, { "epoch": 3.437209220667498, "grad_norm": 0.04878310892940647, "learning_rate": 4.7675239843045306e-05, "loss": 0.8442, "step": 195780 }, { "epoch": 3.4373847855474993, "grad_norm": 0.045149757255204724, "learning_rate": 4.7665851225786e-05, "loss": 0.8427, "step": 195790 }, { "epoch": 3.4375603504275003, "grad_norm": 0.04963214359589337, "learning_rate": 4.765646328428057e-05, "loss": 0.8435, "step": 195800 }, { "epoch": 3.4377359153075018, "grad_norm": 0.07095920634052655, "learning_rate": 4.764707601864802e-05, "loss": 0.8481, "step": 195810 }, { "epoch": 3.4379114801875033, "grad_norm": 0.053894272493810826, "learning_rate": 4.7637689429007195e-05, "loss": 0.8399, "step": 195820 }, { "epoch": 3.4380870450675047, "grad_norm": 0.05352757809279118, "learning_rate": 4.7628303515477087e-05, "loss": 0.8424, "step": 195830 }, { "epoch": 3.438262609947506, "grad_norm": 0.05124728833010432, "learning_rate": 4.7618918278176586e-05, "loss": 0.8393, "step": 195840 }, { "epoch": 3.4384381748275077, "grad_norm": 0.043389208151813546, "learning_rate": 4.760953371722467e-05, "loss": 0.8404, "step": 195850 }, { "epoch": 3.4386137397075087, "grad_norm": 0.05567992194392075, "learning_rate": 4.7600149832740146e-05, "loss": 0.8446, "step": 195860 }, { "epoch": 3.43878930458751, "grad_norm": 0.04514571405631952, "learning_rate": 4.759076662484196e-05, "loss": 0.8348, "step": 195870 }, { "epoch": 3.4389648694675117, "grad_norm": 0.05182183789313628, "learning_rate": 4.758138409364904e-05, "loss": 0.8443, "step": 195880 }, { "epoch": 3.439140434347513, "grad_norm": 0.0445425849512007, "learning_rate": 4.7572002239280184e-05, "loss": 0.8329, "step": 195890 }, { "epoch": 3.4393159992275146, "grad_norm": 0.05118467204140835, "learning_rate": 4.7562621061854285e-05, "loss": 0.8335, "step": 195900 }, { "epoch": 3.439491564107516, "grad_norm": 0.05522424554132841, "learning_rate": 4.755324056149023e-05, "loss": 0.8362, "step": 195910 }, { "epoch": 3.4396671289875176, "grad_norm": 0.05911025675173855, "learning_rate": 4.75438607383069e-05, "loss": 0.8491, "step": 195920 }, { "epoch": 3.4398426938675186, "grad_norm": 0.06491560103306272, "learning_rate": 4.7534481592423095e-05, "loss": 0.8419, "step": 195930 }, { "epoch": 3.44001825874752, "grad_norm": 0.055971955233953, "learning_rate": 4.752510312395758e-05, "loss": 0.8391, "step": 195940 }, { "epoch": 3.4401938236275216, "grad_norm": 0.0525523249040413, "learning_rate": 4.751572533302933e-05, "loss": 0.8384, "step": 195950 }, { "epoch": 3.440369388507523, "grad_norm": 0.07371243797151461, "learning_rate": 4.7506348219757054e-05, "loss": 0.8452, "step": 195960 }, { "epoch": 3.4405449533875245, "grad_norm": 0.049010876443591006, "learning_rate": 4.74969717842596e-05, "loss": 0.8394, "step": 195970 }, { "epoch": 3.4407205182675256, "grad_norm": 0.050365371694553795, "learning_rate": 4.7487596026655807e-05, "loss": 0.8444, "step": 195980 }, { "epoch": 3.440896083147527, "grad_norm": 0.04437673014797061, "learning_rate": 4.747822094706439e-05, "loss": 0.832, "step": 195990 }, { "epoch": 3.4410716480275285, "grad_norm": 0.04632310477547041, "learning_rate": 4.7468846545604165e-05, "loss": 0.8538, "step": 196000 }, { "epoch": 3.44124721290753, "grad_norm": 0.054063253758333464, "learning_rate": 4.745947282239392e-05, "loss": 0.8366, "step": 196010 }, { "epoch": 3.4414227777875315, "grad_norm": 0.05141324482777217, "learning_rate": 4.7450099777552446e-05, "loss": 0.8383, "step": 196020 }, { "epoch": 3.441598342667533, "grad_norm": 0.06441951295374315, "learning_rate": 4.744072741119844e-05, "loss": 0.8396, "step": 196030 }, { "epoch": 3.4417739075475344, "grad_norm": 0.04980581422198285, "learning_rate": 4.7431355723450676e-05, "loss": 0.8467, "step": 196040 }, { "epoch": 3.4419494724275355, "grad_norm": 0.06909035193355492, "learning_rate": 4.742198471442795e-05, "loss": 0.8445, "step": 196050 }, { "epoch": 3.442125037307537, "grad_norm": 0.05312563478274068, "learning_rate": 4.741261438424889e-05, "loss": 0.8388, "step": 196060 }, { "epoch": 3.4423006021875384, "grad_norm": 0.06034812625925376, "learning_rate": 4.74032447330323e-05, "loss": 0.8385, "step": 196070 }, { "epoch": 3.44247616706754, "grad_norm": 0.05454329797756831, "learning_rate": 4.739387576089686e-05, "loss": 0.839, "step": 196080 }, { "epoch": 3.4426517319475414, "grad_norm": 0.047508162415083995, "learning_rate": 4.7384507467961315e-05, "loss": 0.8394, "step": 196090 }, { "epoch": 3.4428272968275424, "grad_norm": 0.04556774746781447, "learning_rate": 4.737513985434434e-05, "loss": 0.8412, "step": 196100 }, { "epoch": 3.443002861707544, "grad_norm": 0.07462893905577377, "learning_rate": 4.736577292016454e-05, "loss": 0.8419, "step": 196110 }, { "epoch": 3.4431784265875454, "grad_norm": 0.04370697333452839, "learning_rate": 4.735640666554077e-05, "loss": 0.8401, "step": 196120 }, { "epoch": 3.443353991467547, "grad_norm": 0.047887530685747554, "learning_rate": 4.7347041090591554e-05, "loss": 0.8368, "step": 196130 }, { "epoch": 3.4435295563475483, "grad_norm": 0.062191522858930245, "learning_rate": 4.733767619543562e-05, "loss": 0.8458, "step": 196140 }, { "epoch": 3.44370512122755, "grad_norm": 0.05373495960876097, "learning_rate": 4.7328311980191645e-05, "loss": 0.8394, "step": 196150 }, { "epoch": 3.4438806861075513, "grad_norm": 0.061888888770178806, "learning_rate": 4.7318948444978205e-05, "loss": 0.8366, "step": 196160 }, { "epoch": 3.4440562509875523, "grad_norm": 0.06141804252310623, "learning_rate": 4.730958558991398e-05, "loss": 0.8391, "step": 196170 }, { "epoch": 3.444231815867554, "grad_norm": 0.05291101004948447, "learning_rate": 4.730022341511759e-05, "loss": 0.8409, "step": 196180 }, { "epoch": 3.4444073807475553, "grad_norm": 0.06445769257562677, "learning_rate": 4.729086192070771e-05, "loss": 0.8439, "step": 196190 }, { "epoch": 3.4445829456275567, "grad_norm": 0.05213811459021087, "learning_rate": 4.728150110680285e-05, "loss": 0.8447, "step": 196200 }, { "epoch": 3.444758510507558, "grad_norm": 0.04499456711928696, "learning_rate": 4.727214097352168e-05, "loss": 0.8481, "step": 196210 }, { "epoch": 3.4449340753875592, "grad_norm": 0.047617729597516686, "learning_rate": 4.7262781520982805e-05, "loss": 0.8395, "step": 196220 }, { "epoch": 3.4451096402675607, "grad_norm": 0.048279069395103076, "learning_rate": 4.7253422749304757e-05, "loss": 0.8402, "step": 196230 }, { "epoch": 3.445285205147562, "grad_norm": 0.051005390339278756, "learning_rate": 4.724406465860615e-05, "loss": 0.8344, "step": 196240 }, { "epoch": 3.4454607700275637, "grad_norm": 0.05708838074618044, "learning_rate": 4.723470724900553e-05, "loss": 0.8395, "step": 196250 }, { "epoch": 3.445636334907565, "grad_norm": 0.051541659780804716, "learning_rate": 4.7225350520621523e-05, "loss": 0.8361, "step": 196260 }, { "epoch": 3.4458118997875666, "grad_norm": 0.06477743159867472, "learning_rate": 4.721599447357263e-05, "loss": 0.8406, "step": 196270 }, { "epoch": 3.445987464667568, "grad_norm": 0.0508847071031361, "learning_rate": 4.720663910797731e-05, "loss": 0.8345, "step": 196280 }, { "epoch": 3.446163029547569, "grad_norm": 0.08930115206230974, "learning_rate": 4.719728442395427e-05, "loss": 0.8422, "step": 196290 }, { "epoch": 3.4463385944275706, "grad_norm": 0.046704295684666886, "learning_rate": 4.71879304216219e-05, "loss": 0.8461, "step": 196300 }, { "epoch": 3.446514159307572, "grad_norm": 0.04710873229804889, "learning_rate": 4.717857710109877e-05, "loss": 0.8475, "step": 196310 }, { "epoch": 3.4466897241875736, "grad_norm": 0.05018702913103075, "learning_rate": 4.716922446250342e-05, "loss": 0.8475, "step": 196320 }, { "epoch": 3.446865289067575, "grad_norm": 0.0553967653487441, "learning_rate": 4.7159872505954266e-05, "loss": 0.8421, "step": 196330 }, { "epoch": 3.447040853947576, "grad_norm": 0.04864246320372023, "learning_rate": 4.715052123156987e-05, "loss": 0.8358, "step": 196340 }, { "epoch": 3.4472164188275776, "grad_norm": 0.06620373069877032, "learning_rate": 4.714117063946866e-05, "loss": 0.849, "step": 196350 }, { "epoch": 3.447391983707579, "grad_norm": 0.046525808529272225, "learning_rate": 4.71318207297692e-05, "loss": 0.8388, "step": 196360 }, { "epoch": 3.4475675485875805, "grad_norm": 0.04705927569786345, "learning_rate": 4.712247150258984e-05, "loss": 0.8422, "step": 196370 }, { "epoch": 3.447743113467582, "grad_norm": 0.05772552187229953, "learning_rate": 4.711312295804911e-05, "loss": 0.8384, "step": 196380 }, { "epoch": 3.4479186783475835, "grad_norm": 0.051643297534160734, "learning_rate": 4.710377509626546e-05, "loss": 0.8388, "step": 196390 }, { "epoch": 3.448094243227585, "grad_norm": 0.0584911690633758, "learning_rate": 4.709442791735728e-05, "loss": 0.8431, "step": 196400 }, { "epoch": 3.448269808107586, "grad_norm": 0.051898878493141454, "learning_rate": 4.708508142144303e-05, "loss": 0.8371, "step": 196410 }, { "epoch": 3.4484453729875875, "grad_norm": 0.06232864448125314, "learning_rate": 4.7075735608641124e-05, "loss": 0.838, "step": 196420 }, { "epoch": 3.448620937867589, "grad_norm": 0.049372747459990145, "learning_rate": 4.706639047907003e-05, "loss": 0.839, "step": 196430 }, { "epoch": 3.4487965027475904, "grad_norm": 0.05156009275954762, "learning_rate": 4.70570460328481e-05, "loss": 0.8352, "step": 196440 }, { "epoch": 3.448972067627592, "grad_norm": 0.06461189657105007, "learning_rate": 4.704770227009366e-05, "loss": 0.8421, "step": 196450 }, { "epoch": 3.4491476325075934, "grad_norm": 0.05592807308659329, "learning_rate": 4.7038359190925246e-05, "loss": 0.8392, "step": 196460 }, { "epoch": 3.4493231973875944, "grad_norm": 0.06099622673717255, "learning_rate": 4.7029016795461126e-05, "loss": 0.8418, "step": 196470 }, { "epoch": 3.449498762267596, "grad_norm": 0.06404593913347874, "learning_rate": 4.70196750838197e-05, "loss": 0.8428, "step": 196480 }, { "epoch": 3.4496743271475974, "grad_norm": 0.049340386641492706, "learning_rate": 4.7010334056119395e-05, "loss": 0.8407, "step": 196490 }, { "epoch": 3.449849892027599, "grad_norm": 0.04806272534527263, "learning_rate": 4.700099371247845e-05, "loss": 0.8418, "step": 196500 }, { "epoch": 3.4500254569076003, "grad_norm": 0.05577182602799346, "learning_rate": 4.6991654053015264e-05, "loss": 0.8412, "step": 196510 }, { "epoch": 3.450201021787602, "grad_norm": 0.04550103383117711, "learning_rate": 4.698231507784817e-05, "loss": 0.8461, "step": 196520 }, { "epoch": 3.450376586667603, "grad_norm": 0.05628928862894513, "learning_rate": 4.6972976787095534e-05, "loss": 0.8419, "step": 196530 }, { "epoch": 3.4505521515476043, "grad_norm": 0.059417879332688696, "learning_rate": 4.6963639180875604e-05, "loss": 0.8455, "step": 196540 }, { "epoch": 3.450727716427606, "grad_norm": 0.04071837293223006, "learning_rate": 4.695430225930672e-05, "loss": 0.8475, "step": 196550 }, { "epoch": 3.4509032813076073, "grad_norm": 0.04563148913023968, "learning_rate": 4.694496602250722e-05, "loss": 0.843, "step": 196560 }, { "epoch": 3.4510788461876087, "grad_norm": 0.047642276694462124, "learning_rate": 4.693563047059533e-05, "loss": 0.8458, "step": 196570 }, { "epoch": 3.45125441106761, "grad_norm": 0.04807230949279478, "learning_rate": 4.692629560368936e-05, "loss": 0.8479, "step": 196580 }, { "epoch": 3.4514299759476113, "grad_norm": 0.05098251638104363, "learning_rate": 4.6916961421907584e-05, "loss": 0.8406, "step": 196590 }, { "epoch": 3.4516055408276127, "grad_norm": 0.04569414677247515, "learning_rate": 4.690762792536831e-05, "loss": 0.8404, "step": 196600 }, { "epoch": 3.451781105707614, "grad_norm": 0.04509444637648617, "learning_rate": 4.6898295114189756e-05, "loss": 0.844, "step": 196610 }, { "epoch": 3.4519566705876157, "grad_norm": 0.04706107458844503, "learning_rate": 4.68889629884901e-05, "loss": 0.8489, "step": 196620 }, { "epoch": 3.452132235467617, "grad_norm": 0.04755267533130521, "learning_rate": 4.687963154838772e-05, "loss": 0.8312, "step": 196630 }, { "epoch": 3.4523078003476186, "grad_norm": 0.051193961631097094, "learning_rate": 4.687030079400075e-05, "loss": 0.8426, "step": 196640 }, { "epoch": 3.4524833652276197, "grad_norm": 0.058307866021171155, "learning_rate": 4.686097072544745e-05, "loss": 0.8471, "step": 196650 }, { "epoch": 3.452658930107621, "grad_norm": 0.052165322170707705, "learning_rate": 4.685164134284606e-05, "loss": 0.8439, "step": 196660 }, { "epoch": 3.4528344949876226, "grad_norm": 0.04414161931597973, "learning_rate": 4.6842312646314704e-05, "loss": 0.8312, "step": 196670 }, { "epoch": 3.453010059867624, "grad_norm": 0.07136209495478579, "learning_rate": 4.683298463597167e-05, "loss": 0.8425, "step": 196680 }, { "epoch": 3.4531856247476256, "grad_norm": 0.04751907216936714, "learning_rate": 4.682365731193502e-05, "loss": 0.8556, "step": 196690 }, { "epoch": 3.453361189627627, "grad_norm": 0.06164353416216412, "learning_rate": 4.68143306743231e-05, "loss": 0.842, "step": 196700 }, { "epoch": 3.4535367545076285, "grad_norm": 0.06360148100487617, "learning_rate": 4.680500472325396e-05, "loss": 0.8381, "step": 196710 }, { "epoch": 3.4537123193876296, "grad_norm": 0.0562209153493164, "learning_rate": 4.679567945884579e-05, "loss": 0.8393, "step": 196720 }, { "epoch": 3.453887884267631, "grad_norm": 0.07963791417975098, "learning_rate": 4.678635488121678e-05, "loss": 0.8433, "step": 196730 }, { "epoch": 3.4540634491476325, "grad_norm": 0.05287333998268147, "learning_rate": 4.677703099048502e-05, "loss": 0.844, "step": 196740 }, { "epoch": 3.454239014027634, "grad_norm": 0.05098885843722229, "learning_rate": 4.676770778676866e-05, "loss": 0.8387, "step": 196750 }, { "epoch": 3.4544145789076355, "grad_norm": 0.05187533973273375, "learning_rate": 4.675838527018584e-05, "loss": 0.8301, "step": 196760 }, { "epoch": 3.4545901437876365, "grad_norm": 0.06708632338762434, "learning_rate": 4.674906344085471e-05, "loss": 0.8391, "step": 196770 }, { "epoch": 3.454765708667638, "grad_norm": 0.05054505242463005, "learning_rate": 4.673974229889334e-05, "loss": 0.8462, "step": 196780 }, { "epoch": 3.4549412735476395, "grad_norm": 0.060993797276931376, "learning_rate": 4.673042184441977e-05, "loss": 0.8423, "step": 196790 }, { "epoch": 3.455116838427641, "grad_norm": 0.052402590478197494, "learning_rate": 4.6721102077552224e-05, "loss": 0.8372, "step": 196800 }, { "epoch": 3.4552924033076424, "grad_norm": 0.06343189421177822, "learning_rate": 4.671178299840867e-05, "loss": 0.8365, "step": 196810 }, { "epoch": 3.455467968187644, "grad_norm": 0.05313346190997617, "learning_rate": 4.6702464607107243e-05, "loss": 0.8406, "step": 196820 }, { "epoch": 3.4556435330676454, "grad_norm": 0.047177615564312186, "learning_rate": 4.669314690376603e-05, "loss": 0.8348, "step": 196830 }, { "epoch": 3.4558190979476464, "grad_norm": 0.059127313627491164, "learning_rate": 4.6683829888503024e-05, "loss": 0.8273, "step": 196840 }, { "epoch": 3.455994662827648, "grad_norm": 0.061776683105313944, "learning_rate": 4.667451356143632e-05, "loss": 0.8365, "step": 196850 }, { "epoch": 3.4561702277076494, "grad_norm": 0.058858403390666, "learning_rate": 4.666519792268388e-05, "loss": 0.8407, "step": 196860 }, { "epoch": 3.456345792587651, "grad_norm": 0.06190619969684676, "learning_rate": 4.6655882972363865e-05, "loss": 0.8381, "step": 196870 }, { "epoch": 3.4565213574676523, "grad_norm": 0.06320485954781564, "learning_rate": 4.664656871059419e-05, "loss": 0.8452, "step": 196880 }, { "epoch": 3.4566969223476534, "grad_norm": 0.05647091944751015, "learning_rate": 4.663725513749292e-05, "loss": 0.841, "step": 196890 }, { "epoch": 3.456872487227655, "grad_norm": 0.04646022211982402, "learning_rate": 4.662794225317806e-05, "loss": 0.8397, "step": 196900 }, { "epoch": 3.4570480521076563, "grad_norm": 0.05495049667699605, "learning_rate": 4.661863005776757e-05, "loss": 0.8373, "step": 196910 }, { "epoch": 3.457223616987658, "grad_norm": 0.05465411774507581, "learning_rate": 4.660931855137947e-05, "loss": 0.8439, "step": 196920 }, { "epoch": 3.4573991818676593, "grad_norm": 0.05523646084651684, "learning_rate": 4.660000773413171e-05, "loss": 0.8431, "step": 196930 }, { "epoch": 3.4575747467476607, "grad_norm": 0.06143275946400446, "learning_rate": 4.659069760614231e-05, "loss": 0.835, "step": 196940 }, { "epoch": 3.457750311627662, "grad_norm": 0.057708781739356375, "learning_rate": 4.6581388167529214e-05, "loss": 0.8358, "step": 196950 }, { "epoch": 3.4579258765076633, "grad_norm": 0.04949481051803071, "learning_rate": 4.657207941841028e-05, "loss": 0.8398, "step": 196960 }, { "epoch": 3.4581014413876647, "grad_norm": 0.057996845381131236, "learning_rate": 4.65627713589036e-05, "loss": 0.849, "step": 196970 }, { "epoch": 3.458277006267666, "grad_norm": 0.05227205646255294, "learning_rate": 4.655346398912701e-05, "loss": 0.8436, "step": 196980 }, { "epoch": 3.4584525711476677, "grad_norm": 0.05688085203135641, "learning_rate": 4.6544157309198464e-05, "loss": 0.8384, "step": 196990 }, { "epoch": 3.458628136027669, "grad_norm": 0.05409196746728976, "learning_rate": 4.6534851319235916e-05, "loss": 0.8448, "step": 197000 }, { "epoch": 3.45880370090767, "grad_norm": 0.05426963030771665, "learning_rate": 4.652554601935721e-05, "loss": 0.8393, "step": 197010 }, { "epoch": 3.4589792657876717, "grad_norm": 0.06610642054779073, "learning_rate": 4.65162414096803e-05, "loss": 0.8404, "step": 197020 }, { "epoch": 3.459154830667673, "grad_norm": 0.05744545196362876, "learning_rate": 4.650693749032298e-05, "loss": 0.8411, "step": 197030 }, { "epoch": 3.4593303955476746, "grad_norm": 0.0910861156489301, "learning_rate": 4.649763426140329e-05, "loss": 0.8418, "step": 197040 }, { "epoch": 3.459505960427676, "grad_norm": 0.04803828812356007, "learning_rate": 4.648833172303898e-05, "loss": 0.839, "step": 197050 }, { "epoch": 3.4596815253076776, "grad_norm": 0.05845946909320537, "learning_rate": 4.647902987534795e-05, "loss": 0.8399, "step": 197060 }, { "epoch": 3.459857090187679, "grad_norm": 0.04847139983800448, "learning_rate": 4.64697287184481e-05, "loss": 0.8442, "step": 197070 }, { "epoch": 3.46003265506768, "grad_norm": 0.05048975962148135, "learning_rate": 4.6460428252457204e-05, "loss": 0.8485, "step": 197080 }, { "epoch": 3.4602082199476816, "grad_norm": 0.06022866034701563, "learning_rate": 4.645112847749313e-05, "loss": 0.8393, "step": 197090 }, { "epoch": 3.460383784827683, "grad_norm": 0.06544654784512123, "learning_rate": 4.64418293936737e-05, "loss": 0.8354, "step": 197100 }, { "epoch": 3.4605593497076845, "grad_norm": 0.054157266231730034, "learning_rate": 4.64325310011168e-05, "loss": 0.8443, "step": 197110 }, { "epoch": 3.460734914587686, "grad_norm": 0.056394563680862125, "learning_rate": 4.64232332999402e-05, "loss": 0.8503, "step": 197120 }, { "epoch": 3.460910479467687, "grad_norm": 0.04808135158714719, "learning_rate": 4.64139362902616e-05, "loss": 0.843, "step": 197130 }, { "epoch": 3.4610860443476885, "grad_norm": 0.057282811261637014, "learning_rate": 4.640463997219897e-05, "loss": 0.8397, "step": 197140 }, { "epoch": 3.46126160922769, "grad_norm": 0.05008221084898831, "learning_rate": 4.639534434586998e-05, "loss": 0.8423, "step": 197150 }, { "epoch": 3.4614371741076915, "grad_norm": 0.05739576897050903, "learning_rate": 4.638604941139244e-05, "loss": 0.836, "step": 197160 }, { "epoch": 3.461612738987693, "grad_norm": 0.0620256739247974, "learning_rate": 4.6376755168884156e-05, "loss": 0.8414, "step": 197170 }, { "epoch": 3.4617883038676944, "grad_norm": 0.05766333694570417, "learning_rate": 4.636746161846282e-05, "loss": 0.8429, "step": 197180 }, { "epoch": 3.461963868747696, "grad_norm": 0.05099203897967336, "learning_rate": 4.635816876024625e-05, "loss": 0.8365, "step": 197190 }, { "epoch": 3.462139433627697, "grad_norm": 0.07241348176844167, "learning_rate": 4.6348876594352076e-05, "loss": 0.8492, "step": 197200 }, { "epoch": 3.4623149985076984, "grad_norm": 0.052367901004620326, "learning_rate": 4.633958512089818e-05, "loss": 0.8369, "step": 197210 }, { "epoch": 3.4624905633877, "grad_norm": 0.07042694598976819, "learning_rate": 4.6330294340002186e-05, "loss": 0.8432, "step": 197220 }, { "epoch": 3.4626661282677014, "grad_norm": 0.04644117401236317, "learning_rate": 4.632100425178184e-05, "loss": 0.8446, "step": 197230 }, { "epoch": 3.462841693147703, "grad_norm": 0.053009959765800946, "learning_rate": 4.6311714856354895e-05, "loss": 0.8351, "step": 197240 }, { "epoch": 3.4630172580277043, "grad_norm": 0.046830333267445456, "learning_rate": 4.6302426153838954e-05, "loss": 0.8366, "step": 197250 }, { "epoch": 3.4631928229077054, "grad_norm": 0.05978010935199636, "learning_rate": 4.629313814435176e-05, "loss": 0.8442, "step": 197260 }, { "epoch": 3.463368387787707, "grad_norm": 0.044582845246675946, "learning_rate": 4.6283850828010995e-05, "loss": 0.8449, "step": 197270 }, { "epoch": 3.4635439526677083, "grad_norm": 0.05589940097380329, "learning_rate": 4.6274564204934364e-05, "loss": 0.8296, "step": 197280 }, { "epoch": 3.46371951754771, "grad_norm": 0.044109919551700606, "learning_rate": 4.626527827523948e-05, "loss": 0.8359, "step": 197290 }, { "epoch": 3.4638950824277113, "grad_norm": 0.06318523543519008, "learning_rate": 4.625599303904394e-05, "loss": 0.8421, "step": 197300 }, { "epoch": 3.4640706473077127, "grad_norm": 0.04542707681151442, "learning_rate": 4.6246708496465534e-05, "loss": 0.8335, "step": 197310 }, { "epoch": 3.464246212187714, "grad_norm": 0.04768139351693255, "learning_rate": 4.62374246476218e-05, "loss": 0.844, "step": 197320 }, { "epoch": 3.4644217770677153, "grad_norm": 0.04868842882899563, "learning_rate": 4.6228141492630375e-05, "loss": 0.8378, "step": 197330 }, { "epoch": 3.4645973419477167, "grad_norm": 0.06622370637829345, "learning_rate": 4.621885903160894e-05, "loss": 0.849, "step": 197340 }, { "epoch": 3.464772906827718, "grad_norm": 0.06712889493531762, "learning_rate": 4.620957726467503e-05, "loss": 0.8425, "step": 197350 }, { "epoch": 3.4649484717077197, "grad_norm": 0.053046807417861906, "learning_rate": 4.620029619194629e-05, "loss": 0.8395, "step": 197360 }, { "epoch": 3.465124036587721, "grad_norm": 0.051734691196682926, "learning_rate": 4.619101581354025e-05, "loss": 0.8396, "step": 197370 }, { "epoch": 3.4652996014677226, "grad_norm": 0.053291255262996454, "learning_rate": 4.6181736129574605e-05, "loss": 0.8392, "step": 197380 }, { "epoch": 3.4654751663477237, "grad_norm": 0.05661649391250457, "learning_rate": 4.6172457140166844e-05, "loss": 0.848, "step": 197390 }, { "epoch": 3.465650731227725, "grad_norm": 0.04249474258497737, "learning_rate": 4.616317884543455e-05, "loss": 0.841, "step": 197400 }, { "epoch": 3.4658262961077266, "grad_norm": 0.0637778020668044, "learning_rate": 4.615390124549532e-05, "loss": 0.8375, "step": 197410 }, { "epoch": 3.466001860987728, "grad_norm": 0.06317116125085986, "learning_rate": 4.614462434046664e-05, "loss": 0.8446, "step": 197420 }, { "epoch": 3.4661774258677296, "grad_norm": 0.05465412249694534, "learning_rate": 4.613534813046608e-05, "loss": 0.8427, "step": 197430 }, { "epoch": 3.4663529907477306, "grad_norm": 0.04843960621338829, "learning_rate": 4.612607261561119e-05, "loss": 0.8444, "step": 197440 }, { "epoch": 3.466528555627732, "grad_norm": 0.06674162651487747, "learning_rate": 4.61167977960195e-05, "loss": 0.8471, "step": 197450 }, { "epoch": 3.4667041205077336, "grad_norm": 0.09256806139678862, "learning_rate": 4.61075236718085e-05, "loss": 0.8483, "step": 197460 }, { "epoch": 3.466879685387735, "grad_norm": 0.046231863196923975, "learning_rate": 4.609825024309562e-05, "loss": 0.8391, "step": 197470 }, { "epoch": 3.4670552502677365, "grad_norm": 0.047312815844425114, "learning_rate": 4.608897750999851e-05, "loss": 0.8396, "step": 197480 }, { "epoch": 3.467230815147738, "grad_norm": 0.056707586244023715, "learning_rate": 4.607970547263454e-05, "loss": 0.843, "step": 197490 }, { "epoch": 3.4674063800277395, "grad_norm": 0.0647628827746781, "learning_rate": 4.6070434131121234e-05, "loss": 0.8403, "step": 197500 }, { "epoch": 3.4675819449077405, "grad_norm": 0.06791857665917216, "learning_rate": 4.6061163485576075e-05, "loss": 0.848, "step": 197510 }, { "epoch": 3.467757509787742, "grad_norm": 0.054908550853236766, "learning_rate": 4.605189353611647e-05, "loss": 0.8436, "step": 197520 }, { "epoch": 3.4679330746677435, "grad_norm": 0.05132670737644294, "learning_rate": 4.6042624282859955e-05, "loss": 0.8364, "step": 197530 }, { "epoch": 3.468108639547745, "grad_norm": 0.05193530088008754, "learning_rate": 4.603335572592385e-05, "loss": 0.8397, "step": 197540 }, { "epoch": 3.4682842044277464, "grad_norm": 0.06643689815643512, "learning_rate": 4.602408786542572e-05, "loss": 0.8363, "step": 197550 }, { "epoch": 3.4684597693077475, "grad_norm": 0.05597872126266016, "learning_rate": 4.6014820701482905e-05, "loss": 0.8405, "step": 197560 }, { "epoch": 3.468635334187749, "grad_norm": 0.0541522457185559, "learning_rate": 4.600555423421285e-05, "loss": 0.8449, "step": 197570 }, { "epoch": 3.4688108990677504, "grad_norm": 0.05694057206831768, "learning_rate": 4.5996288463732994e-05, "loss": 0.8391, "step": 197580 }, { "epoch": 3.468986463947752, "grad_norm": 0.05093710110360561, "learning_rate": 4.598702339016067e-05, "loss": 0.8459, "step": 197590 }, { "epoch": 3.4691620288277534, "grad_norm": 0.05560472908704228, "learning_rate": 4.597775901361331e-05, "loss": 0.8406, "step": 197600 }, { "epoch": 3.469337593707755, "grad_norm": 0.053192750548827314, "learning_rate": 4.5968495334208286e-05, "loss": 0.8417, "step": 197610 }, { "epoch": 3.4695131585877563, "grad_norm": 0.05339200188265041, "learning_rate": 4.5959232352063e-05, "loss": 0.8428, "step": 197620 }, { "epoch": 3.4696887234677574, "grad_norm": 0.05676998137177405, "learning_rate": 4.5949970067294806e-05, "loss": 0.8367, "step": 197630 }, { "epoch": 3.469864288347759, "grad_norm": 0.05167349018869613, "learning_rate": 4.594070848002095e-05, "loss": 0.8506, "step": 197640 }, { "epoch": 3.4700398532277603, "grad_norm": 0.05823105148628003, "learning_rate": 4.593144759035895e-05, "loss": 0.8404, "step": 197650 }, { "epoch": 3.470215418107762, "grad_norm": 0.05010298643716784, "learning_rate": 4.592218739842603e-05, "loss": 0.8483, "step": 197660 }, { "epoch": 3.4703909829877633, "grad_norm": 0.05595713861745298, "learning_rate": 4.591292790433955e-05, "loss": 0.8403, "step": 197670 }, { "epoch": 3.4705665478677643, "grad_norm": 0.04524354322008805, "learning_rate": 4.5903669108216856e-05, "loss": 0.8431, "step": 197680 }, { "epoch": 3.470742112747766, "grad_norm": 0.06939394165191178, "learning_rate": 4.5894411010175206e-05, "loss": 0.8436, "step": 197690 }, { "epoch": 3.4709176776277673, "grad_norm": 0.05749852439403477, "learning_rate": 4.588515361033198e-05, "loss": 0.842, "step": 197700 }, { "epoch": 3.4710932425077687, "grad_norm": 0.04969936273986872, "learning_rate": 4.5875896908804324e-05, "loss": 0.844, "step": 197710 }, { "epoch": 3.47126880738777, "grad_norm": 0.06376297657652605, "learning_rate": 4.58666409057097e-05, "loss": 0.8373, "step": 197720 }, { "epoch": 3.4714443722677717, "grad_norm": 0.06360335197647755, "learning_rate": 4.5857385601165274e-05, "loss": 0.8434, "step": 197730 }, { "epoch": 3.471619937147773, "grad_norm": 0.05169260070862869, "learning_rate": 4.584813099528833e-05, "loss": 0.8346, "step": 197740 }, { "epoch": 3.471795502027774, "grad_norm": 0.04624630326809619, "learning_rate": 4.583887708819617e-05, "loss": 0.8375, "step": 197750 }, { "epoch": 3.4719710669077757, "grad_norm": 0.07501540512073937, "learning_rate": 4.5829623880005984e-05, "loss": 0.8406, "step": 197760 }, { "epoch": 3.472146631787777, "grad_norm": 0.05284102030160163, "learning_rate": 4.582037137083503e-05, "loss": 0.8352, "step": 197770 }, { "epoch": 3.4723221966677786, "grad_norm": 0.06371363796889074, "learning_rate": 4.5811119560800556e-05, "loss": 0.835, "step": 197780 }, { "epoch": 3.47249776154778, "grad_norm": 0.056658496837730964, "learning_rate": 4.5801868450019794e-05, "loss": 0.8445, "step": 197790 }, { "epoch": 3.472673326427781, "grad_norm": 0.04712676571981139, "learning_rate": 4.579261803860994e-05, "loss": 0.8421, "step": 197800 }, { "epoch": 3.4728488913077826, "grad_norm": 0.05052938804595096, "learning_rate": 4.578336832668813e-05, "loss": 0.8416, "step": 197810 }, { "epoch": 3.473024456187784, "grad_norm": 0.053770858038039875, "learning_rate": 4.57741193143717e-05, "loss": 0.8449, "step": 197820 }, { "epoch": 3.4732000210677856, "grad_norm": 0.03850132394839228, "learning_rate": 4.5764871001777726e-05, "loss": 0.8463, "step": 197830 }, { "epoch": 3.473375585947787, "grad_norm": 0.04419053080813262, "learning_rate": 4.575562338902341e-05, "loss": 0.8367, "step": 197840 }, { "epoch": 3.4735511508277885, "grad_norm": 0.05496048699616588, "learning_rate": 4.5746376476225986e-05, "loss": 0.8468, "step": 197850 }, { "epoch": 3.47372671570779, "grad_norm": 0.05358923549353526, "learning_rate": 4.573713026350251e-05, "loss": 0.8443, "step": 197860 }, { "epoch": 3.473902280587791, "grad_norm": 0.05429402525047923, "learning_rate": 4.572788475097023e-05, "loss": 0.8512, "step": 197870 }, { "epoch": 3.4740778454677925, "grad_norm": 0.05495959959688894, "learning_rate": 4.5718639938746164e-05, "loss": 0.8437, "step": 197880 }, { "epoch": 3.474253410347794, "grad_norm": 0.06537879045141148, "learning_rate": 4.57093958269476e-05, "loss": 0.8333, "step": 197890 }, { "epoch": 3.4744289752277955, "grad_norm": 0.04046299859766793, "learning_rate": 4.570015241569156e-05, "loss": 0.8441, "step": 197900 }, { "epoch": 3.474604540107797, "grad_norm": 0.05690323390153259, "learning_rate": 4.569090970509518e-05, "loss": 0.8502, "step": 197910 }, { "epoch": 3.4747801049877984, "grad_norm": 0.04646478278749478, "learning_rate": 4.568166769527561e-05, "loss": 0.8366, "step": 197920 }, { "epoch": 3.4749556698677995, "grad_norm": 0.05555858089471255, "learning_rate": 4.567242638634987e-05, "loss": 0.8327, "step": 197930 }, { "epoch": 3.475131234747801, "grad_norm": 0.06369782256578778, "learning_rate": 4.56631857784351e-05, "loss": 0.8441, "step": 197940 }, { "epoch": 3.4753067996278024, "grad_norm": 0.051428258453009565, "learning_rate": 4.5653945871648365e-05, "loss": 0.8471, "step": 197950 }, { "epoch": 3.475482364507804, "grad_norm": 0.05485406377154333, "learning_rate": 4.564470666610679e-05, "loss": 0.8419, "step": 197960 }, { "epoch": 3.4756579293878054, "grad_norm": 0.06378794240883492, "learning_rate": 4.5635468161927376e-05, "loss": 0.8466, "step": 197970 }, { "epoch": 3.475833494267807, "grad_norm": 0.05228832440621661, "learning_rate": 4.562623035922713e-05, "loss": 0.8298, "step": 197980 }, { "epoch": 3.476009059147808, "grad_norm": 0.0510199777586386, "learning_rate": 4.561699325812322e-05, "loss": 0.8384, "step": 197990 }, { "epoch": 3.4761846240278094, "grad_norm": 0.05665999697794374, "learning_rate": 4.560775685873259e-05, "loss": 0.8341, "step": 198000 }, { "epoch": 3.476360188907811, "grad_norm": 0.046489134429219, "learning_rate": 4.559852116117229e-05, "loss": 0.8437, "step": 198010 }, { "epoch": 3.4765357537878123, "grad_norm": 0.06789215803428392, "learning_rate": 4.558928616555939e-05, "loss": 0.8429, "step": 198020 }, { "epoch": 3.476711318667814, "grad_norm": 0.057693128339397395, "learning_rate": 4.558005187201081e-05, "loss": 0.841, "step": 198030 }, { "epoch": 3.4768868835478153, "grad_norm": 0.05142879750724833, "learning_rate": 4.557081828064364e-05, "loss": 0.8457, "step": 198040 }, { "epoch": 3.4770624484278163, "grad_norm": 0.06292747953786532, "learning_rate": 4.5561585391574746e-05, "loss": 0.8415, "step": 198050 }, { "epoch": 3.477238013307818, "grad_norm": 0.052603032588425004, "learning_rate": 4.5552353204921264e-05, "loss": 0.8338, "step": 198060 }, { "epoch": 3.4774135781878193, "grad_norm": 0.048083354284683154, "learning_rate": 4.554312172080005e-05, "loss": 0.8353, "step": 198070 }, { "epoch": 3.4775891430678207, "grad_norm": 0.05031273381204942, "learning_rate": 4.5533890939328124e-05, "loss": 0.8453, "step": 198080 }, { "epoch": 3.477764707947822, "grad_norm": 0.0988647347466929, "learning_rate": 4.552466086062246e-05, "loss": 0.8429, "step": 198090 }, { "epoch": 3.4779402728278237, "grad_norm": 0.039786681849430384, "learning_rate": 4.551543148479993e-05, "loss": 0.832, "step": 198100 }, { "epoch": 3.4781158377078247, "grad_norm": 0.04761208636107407, "learning_rate": 4.5506202811977534e-05, "loss": 0.8403, "step": 198110 }, { "epoch": 3.478291402587826, "grad_norm": 0.07020701011700105, "learning_rate": 4.5496974842272165e-05, "loss": 0.8362, "step": 198120 }, { "epoch": 3.4784669674678277, "grad_norm": 0.04463819613591754, "learning_rate": 4.548774757580082e-05, "loss": 0.8425, "step": 198130 }, { "epoch": 3.478642532347829, "grad_norm": 0.044956640625443155, "learning_rate": 4.5478521012680326e-05, "loss": 0.8388, "step": 198140 }, { "epoch": 3.4788180972278306, "grad_norm": 0.06015014279583066, "learning_rate": 4.5469295153027556e-05, "loss": 0.8412, "step": 198150 }, { "epoch": 3.478993662107832, "grad_norm": 0.05818242181410549, "learning_rate": 4.5460069996959524e-05, "loss": 0.8416, "step": 198160 }, { "epoch": 3.4791692269878336, "grad_norm": 0.05582248459205252, "learning_rate": 4.545084554459303e-05, "loss": 0.8469, "step": 198170 }, { "epoch": 3.4793447918678346, "grad_norm": 0.05496675289721324, "learning_rate": 4.5441621796044945e-05, "loss": 0.8404, "step": 198180 }, { "epoch": 3.479520356747836, "grad_norm": 0.052960170860099874, "learning_rate": 4.543239875143221e-05, "loss": 0.8427, "step": 198190 }, { "epoch": 3.4796959216278376, "grad_norm": 0.058810214184192106, "learning_rate": 4.5423176410871597e-05, "loss": 0.8406, "step": 198200 }, { "epoch": 3.479871486507839, "grad_norm": 0.05728741882389626, "learning_rate": 4.5413954774480025e-05, "loss": 0.8351, "step": 198210 }, { "epoch": 3.4800470513878405, "grad_norm": 0.0525635419247595, "learning_rate": 4.540473384237422e-05, "loss": 0.8418, "step": 198220 }, { "epoch": 3.4802226162678416, "grad_norm": 0.05760878410048432, "learning_rate": 4.539551361467117e-05, "loss": 0.8406, "step": 198230 }, { "epoch": 3.480398181147843, "grad_norm": 0.048023413143515555, "learning_rate": 4.538629409148758e-05, "loss": 0.8398, "step": 198240 }, { "epoch": 3.4805737460278445, "grad_norm": 0.05753044882354377, "learning_rate": 4.53770752729403e-05, "loss": 0.8355, "step": 198250 }, { "epoch": 3.480749310907846, "grad_norm": 0.06601160977307298, "learning_rate": 4.536785715914618e-05, "loss": 0.842, "step": 198260 }, { "epoch": 3.4809248757878475, "grad_norm": 0.06009458879366598, "learning_rate": 4.535863975022193e-05, "loss": 0.8403, "step": 198270 }, { "epoch": 3.481100440667849, "grad_norm": 0.06387275176650598, "learning_rate": 4.534942304628438e-05, "loss": 0.8397, "step": 198280 }, { "epoch": 3.4812760055478504, "grad_norm": 0.061317741468715145, "learning_rate": 4.53402070474503e-05, "loss": 0.8421, "step": 198290 }, { "epoch": 3.4814515704278515, "grad_norm": 0.057040033160666775, "learning_rate": 4.5330991753836506e-05, "loss": 0.8407, "step": 198300 }, { "epoch": 3.481627135307853, "grad_norm": 0.05906722123275731, "learning_rate": 4.532177716555971e-05, "loss": 0.8375, "step": 198310 }, { "epoch": 3.4818027001878544, "grad_norm": 0.050616852858088736, "learning_rate": 4.5312563282736605e-05, "loss": 0.8397, "step": 198320 }, { "epoch": 3.481978265067856, "grad_norm": 0.07150808518568874, "learning_rate": 4.530335010548406e-05, "loss": 0.8381, "step": 198330 }, { "epoch": 3.4821538299478574, "grad_norm": 0.0661394930157319, "learning_rate": 4.529413763391871e-05, "loss": 0.843, "step": 198340 }, { "epoch": 3.4823293948278584, "grad_norm": 0.045797930865483526, "learning_rate": 4.5284925868157304e-05, "loss": 0.8405, "step": 198350 }, { "epoch": 3.48250495970786, "grad_norm": 0.04653645779477515, "learning_rate": 4.527571480831662e-05, "loss": 0.8381, "step": 198360 }, { "epoch": 3.4826805245878614, "grad_norm": 0.06062186794096757, "learning_rate": 4.526650445451326e-05, "loss": 0.8423, "step": 198370 }, { "epoch": 3.482856089467863, "grad_norm": 0.04716049140760879, "learning_rate": 4.525729480686401e-05, "loss": 0.8425, "step": 198380 }, { "epoch": 3.4830316543478643, "grad_norm": 0.05221453688691067, "learning_rate": 4.524808586548544e-05, "loss": 0.8397, "step": 198390 }, { "epoch": 3.483207219227866, "grad_norm": 0.05200711498337973, "learning_rate": 4.5238877630494384e-05, "loss": 0.8415, "step": 198400 }, { "epoch": 3.4833827841078673, "grad_norm": 0.0630319936231948, "learning_rate": 4.52296701020074e-05, "loss": 0.84, "step": 198410 }, { "epoch": 3.4835583489878683, "grad_norm": 0.04606213611839157, "learning_rate": 4.5220463280141176e-05, "loss": 0.8473, "step": 198420 }, { "epoch": 3.48373391386787, "grad_norm": 0.0499733138955965, "learning_rate": 4.5211257165012414e-05, "loss": 0.8384, "step": 198430 }, { "epoch": 3.4839094787478713, "grad_norm": 0.05712738979101536, "learning_rate": 4.520205175673768e-05, "loss": 0.8369, "step": 198440 }, { "epoch": 3.4840850436278727, "grad_norm": 0.039817736035259725, "learning_rate": 4.5192847055433645e-05, "loss": 0.8368, "step": 198450 }, { "epoch": 3.4842606085078742, "grad_norm": 0.044885185175087154, "learning_rate": 4.518364306121693e-05, "loss": 0.8317, "step": 198460 }, { "epoch": 3.4844361733878753, "grad_norm": 0.05395307438713094, "learning_rate": 4.517443977420419e-05, "loss": 0.8393, "step": 198470 }, { "epoch": 3.4846117382678767, "grad_norm": 0.048931637996607644, "learning_rate": 4.5165237194512e-05, "loss": 0.8381, "step": 198480 }, { "epoch": 3.484787303147878, "grad_norm": 0.0668690523735504, "learning_rate": 4.515603532225689e-05, "loss": 0.8373, "step": 198490 }, { "epoch": 3.4849628680278797, "grad_norm": 0.05012058684816146, "learning_rate": 4.5146834157555574e-05, "loss": 0.8431, "step": 198500 }, { "epoch": 3.485138432907881, "grad_norm": 0.06762029265482566, "learning_rate": 4.513763370052455e-05, "loss": 0.838, "step": 198510 }, { "epoch": 3.4853139977878826, "grad_norm": 0.11769561642153123, "learning_rate": 4.5128433951280406e-05, "loss": 0.8353, "step": 198520 }, { "epoch": 3.485489562667884, "grad_norm": 0.04938548310529093, "learning_rate": 4.5119234909939746e-05, "loss": 0.838, "step": 198530 }, { "epoch": 3.485665127547885, "grad_norm": 0.05677441683640447, "learning_rate": 4.511003657661905e-05, "loss": 0.8504, "step": 198540 }, { "epoch": 3.4858406924278866, "grad_norm": 0.056615632182696315, "learning_rate": 4.5100838951434946e-05, "loss": 0.849, "step": 198550 }, { "epoch": 3.486016257307888, "grad_norm": 0.06928029400187821, "learning_rate": 4.509164203450384e-05, "loss": 0.8466, "step": 198560 }, { "epoch": 3.4861918221878896, "grad_norm": 0.042975568830826615, "learning_rate": 4.508244582594244e-05, "loss": 0.8353, "step": 198570 }, { "epoch": 3.486367387067891, "grad_norm": 0.05231891673525908, "learning_rate": 4.507325032586711e-05, "loss": 0.8476, "step": 198580 }, { "epoch": 3.486542951947892, "grad_norm": 0.05045689137549638, "learning_rate": 4.5064055534394426e-05, "loss": 0.8463, "step": 198590 }, { "epoch": 3.4867185168278936, "grad_norm": 0.06254107631820234, "learning_rate": 4.5054861451640916e-05, "loss": 0.841, "step": 198600 }, { "epoch": 3.486894081707895, "grad_norm": 0.043401486353036506, "learning_rate": 4.5045668077723e-05, "loss": 0.8444, "step": 198610 }, { "epoch": 3.4870696465878965, "grad_norm": 0.06363841627090265, "learning_rate": 4.5036475412757176e-05, "loss": 0.8357, "step": 198620 }, { "epoch": 3.487245211467898, "grad_norm": 0.047828848636035426, "learning_rate": 4.5027283456859955e-05, "loss": 0.8309, "step": 198630 }, { "epoch": 3.4874207763478995, "grad_norm": 0.04844071419866838, "learning_rate": 4.50180922101478e-05, "loss": 0.8455, "step": 198640 }, { "epoch": 3.487596341227901, "grad_norm": 0.05245212812302599, "learning_rate": 4.500890167273716e-05, "loss": 0.8381, "step": 198650 }, { "epoch": 3.487771906107902, "grad_norm": 0.0486482181539715, "learning_rate": 4.4999711844744385e-05, "loss": 0.8386, "step": 198660 }, { "epoch": 3.4879474709879035, "grad_norm": 0.042024935537885215, "learning_rate": 4.499052272628606e-05, "loss": 0.8417, "step": 198670 }, { "epoch": 3.488123035867905, "grad_norm": 0.05353142946127681, "learning_rate": 4.4981334317478515e-05, "loss": 0.8398, "step": 198680 }, { "epoch": 3.4882986007479064, "grad_norm": 0.05555968817836667, "learning_rate": 4.4972146618438204e-05, "loss": 0.8304, "step": 198690 }, { "epoch": 3.488474165627908, "grad_norm": 0.05433376761198443, "learning_rate": 4.496295962928157e-05, "loss": 0.8356, "step": 198700 }, { "epoch": 3.4886497305079094, "grad_norm": 0.052924656436065906, "learning_rate": 4.4953773350124934e-05, "loss": 0.8338, "step": 198710 }, { "epoch": 3.4888252953879104, "grad_norm": 0.05129501570556686, "learning_rate": 4.494458778108477e-05, "loss": 0.8433, "step": 198720 }, { "epoch": 3.489000860267912, "grad_norm": 0.047449262875638075, "learning_rate": 4.493540292227735e-05, "loss": 0.841, "step": 198730 }, { "epoch": 3.4891764251479134, "grad_norm": 0.053705376606623306, "learning_rate": 4.49262187738192e-05, "loss": 0.8378, "step": 198740 }, { "epoch": 3.489351990027915, "grad_norm": 0.06322913009185321, "learning_rate": 4.491703533582656e-05, "loss": 0.8373, "step": 198750 }, { "epoch": 3.4895275549079163, "grad_norm": 0.0511181817204554, "learning_rate": 4.490785260841583e-05, "loss": 0.8376, "step": 198760 }, { "epoch": 3.489703119787918, "grad_norm": 0.0573971115410176, "learning_rate": 4.48986705917034e-05, "loss": 0.8363, "step": 198770 }, { "epoch": 3.489878684667919, "grad_norm": 0.059441618900109836, "learning_rate": 4.4889489285805526e-05, "loss": 0.8426, "step": 198780 }, { "epoch": 3.4900542495479203, "grad_norm": 0.07427056436079586, "learning_rate": 4.488030869083857e-05, "loss": 0.8508, "step": 198790 }, { "epoch": 3.490229814427922, "grad_norm": 0.05366486366148835, "learning_rate": 4.4871128806918864e-05, "loss": 0.8325, "step": 198800 }, { "epoch": 3.4904053793079233, "grad_norm": 0.04663137483177132, "learning_rate": 4.4861949634162745e-05, "loss": 0.8286, "step": 198810 }, { "epoch": 3.4905809441879248, "grad_norm": 0.05068866285044372, "learning_rate": 4.485277117268648e-05, "loss": 0.8404, "step": 198820 }, { "epoch": 3.4907565090679262, "grad_norm": 0.05837770933162542, "learning_rate": 4.4843593422606306e-05, "loss": 0.8396, "step": 198830 }, { "epoch": 3.4909320739479277, "grad_norm": 0.06919242000008091, "learning_rate": 4.483441638403863e-05, "loss": 0.8424, "step": 198840 }, { "epoch": 3.4911076388279287, "grad_norm": 0.064968262878111, "learning_rate": 4.482524005709963e-05, "loss": 0.8429, "step": 198850 }, { "epoch": 3.49128320370793, "grad_norm": 0.06932946875053662, "learning_rate": 4.48160644419056e-05, "loss": 0.8325, "step": 198860 }, { "epoch": 3.4914587685879317, "grad_norm": 0.05005724165055143, "learning_rate": 4.480688953857283e-05, "loss": 0.8466, "step": 198870 }, { "epoch": 3.491634333467933, "grad_norm": 0.05324313451893133, "learning_rate": 4.479771534721752e-05, "loss": 0.8393, "step": 198880 }, { "epoch": 3.4918098983479346, "grad_norm": 0.05233403102634368, "learning_rate": 4.4788541867955954e-05, "loss": 0.832, "step": 198890 }, { "epoch": 3.4919854632279357, "grad_norm": 0.05426423450731101, "learning_rate": 4.477936910090426e-05, "loss": 0.8401, "step": 198900 }, { "epoch": 3.492161028107937, "grad_norm": 0.04962497089454261, "learning_rate": 4.477019704617881e-05, "loss": 0.8387, "step": 198910 }, { "epoch": 3.4923365929879386, "grad_norm": 0.045494505507683, "learning_rate": 4.476102570389571e-05, "loss": 0.8389, "step": 198920 }, { "epoch": 3.49251215786794, "grad_norm": 0.04482383696518938, "learning_rate": 4.475185507417118e-05, "loss": 0.8407, "step": 198930 }, { "epoch": 3.4926877227479416, "grad_norm": 0.04224066422025547, "learning_rate": 4.474268515712147e-05, "loss": 0.8569, "step": 198940 }, { "epoch": 3.492863287627943, "grad_norm": 0.05278145255172799, "learning_rate": 4.473351595286268e-05, "loss": 0.8275, "step": 198950 }, { "epoch": 3.4930388525079445, "grad_norm": 0.0380850092571548, "learning_rate": 4.4724347461511034e-05, "loss": 0.8441, "step": 198960 }, { "epoch": 3.4932144173879456, "grad_norm": 0.040320370135536646, "learning_rate": 4.471517968318269e-05, "loss": 0.8412, "step": 198970 }, { "epoch": 3.493389982267947, "grad_norm": 0.04551084404808535, "learning_rate": 4.470601261799383e-05, "loss": 0.8513, "step": 198980 }, { "epoch": 3.4935655471479485, "grad_norm": 0.06932628054608699, "learning_rate": 4.46968462660606e-05, "loss": 0.836, "step": 198990 }, { "epoch": 3.49374111202795, "grad_norm": 0.05629127797198223, "learning_rate": 4.468768062749903e-05, "loss": 0.8505, "step": 199000 }, { "epoch": 3.4939166769079515, "grad_norm": 0.05570253121692377, "learning_rate": 4.4678515702425406e-05, "loss": 0.8492, "step": 199010 }, { "epoch": 3.4940922417879525, "grad_norm": 0.059823004756691854, "learning_rate": 4.466935149095576e-05, "loss": 0.8307, "step": 199020 }, { "epoch": 3.494267806667954, "grad_norm": 0.05281971655574328, "learning_rate": 4.466018799320622e-05, "loss": 0.8377, "step": 199030 }, { "epoch": 3.4944433715479555, "grad_norm": 0.05229852758822509, "learning_rate": 4.465102520929294e-05, "loss": 0.8443, "step": 199040 }, { "epoch": 3.494618936427957, "grad_norm": 0.0599117718489344, "learning_rate": 4.464186313933192e-05, "loss": 0.8399, "step": 199050 }, { "epoch": 3.4947945013079584, "grad_norm": 0.052522749018502686, "learning_rate": 4.463270178343934e-05, "loss": 0.8385, "step": 199060 }, { "epoch": 3.49497006618796, "grad_norm": 0.06588677348215416, "learning_rate": 4.462354114173115e-05, "loss": 0.8431, "step": 199070 }, { "epoch": 3.4951456310679614, "grad_norm": 0.05578952581452616, "learning_rate": 4.461438121432359e-05, "loss": 0.8424, "step": 199080 }, { "epoch": 3.4953211959479624, "grad_norm": 0.051576931404738804, "learning_rate": 4.460522200133257e-05, "loss": 0.8342, "step": 199090 }, { "epoch": 3.495496760827964, "grad_norm": 0.08281251040730915, "learning_rate": 4.459606350287421e-05, "loss": 0.841, "step": 199100 }, { "epoch": 3.4956723257079654, "grad_norm": 0.052716558351412784, "learning_rate": 4.458690571906455e-05, "loss": 0.8434, "step": 199110 }, { "epoch": 3.495847890587967, "grad_norm": 0.04795098140784664, "learning_rate": 4.4577748650019585e-05, "loss": 0.8362, "step": 199120 }, { "epoch": 3.4960234554679683, "grad_norm": 0.055200436208294026, "learning_rate": 4.456859229585536e-05, "loss": 0.8478, "step": 199130 }, { "epoch": 3.4961990203479694, "grad_norm": 0.04888549722548365, "learning_rate": 4.4559436656687884e-05, "loss": 0.8356, "step": 199140 }, { "epoch": 3.496374585227971, "grad_norm": 0.04001841355385094, "learning_rate": 4.45502817326332e-05, "loss": 0.8417, "step": 199150 }, { "epoch": 3.4965501501079723, "grad_norm": 0.050247867683029475, "learning_rate": 4.4541127523807255e-05, "loss": 0.8486, "step": 199160 }, { "epoch": 3.496725714987974, "grad_norm": 0.05928902508423957, "learning_rate": 4.4531974030325984e-05, "loss": 0.84, "step": 199170 }, { "epoch": 3.4969012798679753, "grad_norm": 0.05961060969699715, "learning_rate": 4.452282125230549e-05, "loss": 0.8385, "step": 199180 }, { "epoch": 3.4970768447479768, "grad_norm": 0.046448012021312705, "learning_rate": 4.451366918986163e-05, "loss": 0.8417, "step": 199190 }, { "epoch": 3.4972524096279782, "grad_norm": 0.06011098325486774, "learning_rate": 4.4504517843110426e-05, "loss": 0.8421, "step": 199200 }, { "epoch": 3.4974279745079793, "grad_norm": 0.04977333114132336, "learning_rate": 4.449536721216784e-05, "loss": 0.8338, "step": 199210 }, { "epoch": 3.4976035393879807, "grad_norm": 0.07547598047547639, "learning_rate": 4.448621729714973e-05, "loss": 0.8454, "step": 199220 }, { "epoch": 3.497779104267982, "grad_norm": 0.06577355386345812, "learning_rate": 4.447706809817213e-05, "loss": 0.8375, "step": 199230 }, { "epoch": 3.4979546691479837, "grad_norm": 0.048491444161510154, "learning_rate": 4.446791961535083e-05, "loss": 0.8382, "step": 199240 }, { "epoch": 3.498130234027985, "grad_norm": 0.04559805669142092, "learning_rate": 4.445877184880189e-05, "loss": 0.8412, "step": 199250 }, { "epoch": 3.498305798907986, "grad_norm": 0.050416727148144075, "learning_rate": 4.4449624798641116e-05, "loss": 0.8399, "step": 199260 }, { "epoch": 3.4984813637879877, "grad_norm": 0.04808740143272317, "learning_rate": 4.444047846498443e-05, "loss": 0.842, "step": 199270 }, { "epoch": 3.498656928667989, "grad_norm": 0.04408757638972667, "learning_rate": 4.4431332847947773e-05, "loss": 0.8408, "step": 199280 }, { "epoch": 3.4988324935479906, "grad_norm": 0.05999375818582311, "learning_rate": 4.4422187947646916e-05, "loss": 0.8365, "step": 199290 }, { "epoch": 3.499008058427992, "grad_norm": 0.06343815447536395, "learning_rate": 4.4413043764197795e-05, "loss": 0.8383, "step": 199300 }, { "epoch": 3.4991836233079936, "grad_norm": 0.055205954131196354, "learning_rate": 4.440390029771624e-05, "loss": 0.8389, "step": 199310 }, { "epoch": 3.499359188187995, "grad_norm": 0.05557390942436213, "learning_rate": 4.439475754831816e-05, "loss": 0.8419, "step": 199320 }, { "epoch": 3.499534753067996, "grad_norm": 0.047573862050795654, "learning_rate": 4.4385615516119345e-05, "loss": 0.8474, "step": 199330 }, { "epoch": 3.4997103179479976, "grad_norm": 0.04846385738418312, "learning_rate": 4.437647420123556e-05, "loss": 0.844, "step": 199340 }, { "epoch": 3.499885882827999, "grad_norm": 0.051312462552984606, "learning_rate": 4.4367333603782766e-05, "loss": 0.836, "step": 199350 }, { "epoch": 3.5000614477080005, "grad_norm": 0.05003334793380585, "learning_rate": 4.435819372387669e-05, "loss": 0.8399, "step": 199360 }, { "epoch": 3.500237012588002, "grad_norm": 0.06104063300597305, "learning_rate": 4.4349054561633144e-05, "loss": 0.8362, "step": 199370 }, { "epoch": 3.500412577468003, "grad_norm": 0.05343257284699606, "learning_rate": 4.433991611716797e-05, "loss": 0.8402, "step": 199380 }, { "epoch": 3.500588142348005, "grad_norm": 0.05939546126669199, "learning_rate": 4.433077839059688e-05, "loss": 0.836, "step": 199390 }, { "epoch": 3.500763707228006, "grad_norm": 0.05717246790442558, "learning_rate": 4.432164138203572e-05, "loss": 0.8455, "step": 199400 }, { "epoch": 3.5009392721080075, "grad_norm": 0.049220676072557874, "learning_rate": 4.431250509160017e-05, "loss": 0.8398, "step": 199410 }, { "epoch": 3.501114836988009, "grad_norm": 0.05272792076784081, "learning_rate": 4.430336951940611e-05, "loss": 0.8471, "step": 199420 }, { "epoch": 3.5012904018680104, "grad_norm": 0.047039806365734856, "learning_rate": 4.4294234665569176e-05, "loss": 0.8323, "step": 199430 }, { "epoch": 3.501465966748012, "grad_norm": 0.05404512714821062, "learning_rate": 4.428510053020517e-05, "loss": 0.8371, "step": 199440 }, { "epoch": 3.501641531628013, "grad_norm": 0.051069020747838575, "learning_rate": 4.4275967113429845e-05, "loss": 0.8397, "step": 199450 }, { "epoch": 3.5018170965080144, "grad_norm": 0.06963944349306432, "learning_rate": 4.426683441535885e-05, "loss": 0.8444, "step": 199460 }, { "epoch": 3.501992661388016, "grad_norm": 0.05036422373051378, "learning_rate": 4.425770243610793e-05, "loss": 0.8394, "step": 199470 }, { "epoch": 3.5021682262680174, "grad_norm": 0.04727120206654478, "learning_rate": 4.4248571175792794e-05, "loss": 0.841, "step": 199480 }, { "epoch": 3.502343791148019, "grad_norm": 0.052080717459183246, "learning_rate": 4.423944063452919e-05, "loss": 0.8365, "step": 199490 }, { "epoch": 3.50251935602802, "grad_norm": 0.046852378172602405, "learning_rate": 4.423031081243273e-05, "loss": 0.8361, "step": 199500 }, { "epoch": 3.502694920908022, "grad_norm": 0.07338125713976561, "learning_rate": 4.422118170961905e-05, "loss": 0.8346, "step": 199510 }, { "epoch": 3.502870485788023, "grad_norm": 0.05838099555089345, "learning_rate": 4.421205332620395e-05, "loss": 0.8425, "step": 199520 }, { "epoch": 3.5030460506680243, "grad_norm": 0.054424063564864546, "learning_rate": 4.420292566230298e-05, "loss": 0.8305, "step": 199530 }, { "epoch": 3.503221615548026, "grad_norm": 0.04571187389809651, "learning_rate": 4.4193798718031826e-05, "loss": 0.8385, "step": 199540 }, { "epoch": 3.5033971804280273, "grad_norm": 0.053701317244260104, "learning_rate": 4.4184672493506177e-05, "loss": 0.8464, "step": 199550 }, { "epoch": 3.5035727453080288, "grad_norm": 0.04975336375676813, "learning_rate": 4.4175546988841574e-05, "loss": 0.8412, "step": 199560 }, { "epoch": 3.50374831018803, "grad_norm": 0.05349716609259876, "learning_rate": 4.4166422204153715e-05, "loss": 0.8411, "step": 199570 }, { "epoch": 3.5039238750680313, "grad_norm": 0.05764846408355311, "learning_rate": 4.41572981395581e-05, "loss": 0.843, "step": 199580 }, { "epoch": 3.5040994399480327, "grad_norm": 0.043083211069038715, "learning_rate": 4.414817479517049e-05, "loss": 0.8465, "step": 199590 }, { "epoch": 3.5042750048280342, "grad_norm": 0.06683720724620766, "learning_rate": 4.413905217110637e-05, "loss": 0.8412, "step": 199600 }, { "epoch": 3.5044505697080357, "grad_norm": 0.059198696087760246, "learning_rate": 4.4129930267481344e-05, "loss": 0.8439, "step": 199610 }, { "epoch": 3.5046261345880367, "grad_norm": 0.051453111657391574, "learning_rate": 4.4120809084411045e-05, "loss": 0.843, "step": 199620 }, { "epoch": 3.5048016994680387, "grad_norm": 0.0637875768502769, "learning_rate": 4.411168862201096e-05, "loss": 0.8449, "step": 199630 }, { "epoch": 3.5049772643480397, "grad_norm": 0.059553692862372484, "learning_rate": 4.410256888039669e-05, "loss": 0.8493, "step": 199640 }, { "epoch": 3.505152829228041, "grad_norm": 0.05969456867233194, "learning_rate": 4.409344985968376e-05, "loss": 0.8396, "step": 199650 }, { "epoch": 3.5053283941080426, "grad_norm": 0.06261529841503233, "learning_rate": 4.4084331559987765e-05, "loss": 0.8374, "step": 199660 }, { "epoch": 3.505503958988044, "grad_norm": 0.04985268679278403, "learning_rate": 4.407521398142421e-05, "loss": 0.8452, "step": 199670 }, { "epoch": 3.5056795238680456, "grad_norm": 0.06216403230619467, "learning_rate": 4.406609712410852e-05, "loss": 0.842, "step": 199680 }, { "epoch": 3.5058550887480466, "grad_norm": 0.05057590446131984, "learning_rate": 4.405698098815636e-05, "loss": 0.841, "step": 199690 }, { "epoch": 3.506030653628048, "grad_norm": 0.07504400190400216, "learning_rate": 4.4047865573683136e-05, "loss": 0.8432, "step": 199700 }, { "epoch": 3.5062062185080496, "grad_norm": 0.06737206326313772, "learning_rate": 4.403875088080437e-05, "loss": 0.8416, "step": 199710 }, { "epoch": 3.506381783388051, "grad_norm": 0.0579214300502351, "learning_rate": 4.402963690963559e-05, "loss": 0.8391, "step": 199720 }, { "epoch": 3.5065573482680525, "grad_norm": 0.05544136228138098, "learning_rate": 4.402052366029218e-05, "loss": 0.8434, "step": 199730 }, { "epoch": 3.506732913148054, "grad_norm": 0.05603347221882146, "learning_rate": 4.401141113288969e-05, "loss": 0.8389, "step": 199740 }, { "epoch": 3.5069084780280555, "grad_norm": 0.05223532816489186, "learning_rate": 4.400229932754348e-05, "loss": 0.8404, "step": 199750 }, { "epoch": 3.5070840429080565, "grad_norm": 0.0511681736375196, "learning_rate": 4.3993188244369125e-05, "loss": 0.835, "step": 199760 }, { "epoch": 3.507259607788058, "grad_norm": 0.08044437566910967, "learning_rate": 4.398407788348197e-05, "loss": 0.8434, "step": 199770 }, { "epoch": 3.5074351726680595, "grad_norm": 0.05100619581084862, "learning_rate": 4.3974968244997474e-05, "loss": 0.8391, "step": 199780 }, { "epoch": 3.507610737548061, "grad_norm": 0.04286863585419777, "learning_rate": 4.3965859329031087e-05, "loss": 0.834, "step": 199790 }, { "epoch": 3.5077863024280624, "grad_norm": 0.05573230248057698, "learning_rate": 4.395675113569818e-05, "loss": 0.8417, "step": 199800 }, { "epoch": 3.5079618673080635, "grad_norm": 0.05003810963086917, "learning_rate": 4.394764366511414e-05, "loss": 0.8478, "step": 199810 }, { "epoch": 3.508137432188065, "grad_norm": 0.05516782959138597, "learning_rate": 4.393853691739441e-05, "loss": 0.8521, "step": 199820 }, { "epoch": 3.5083129970680664, "grad_norm": 0.04949345414195553, "learning_rate": 4.3929430892654374e-05, "loss": 0.8374, "step": 199830 }, { "epoch": 3.508488561948068, "grad_norm": 0.06215372162539031, "learning_rate": 4.39203255910094e-05, "loss": 0.8352, "step": 199840 }, { "epoch": 3.5086641268280694, "grad_norm": 0.05024716155493693, "learning_rate": 4.391122101257476e-05, "loss": 0.8406, "step": 199850 }, { "epoch": 3.508839691708071, "grad_norm": 0.06830393332795988, "learning_rate": 4.390211715746596e-05, "loss": 0.8439, "step": 199860 }, { "epoch": 3.5090152565880723, "grad_norm": 0.0746347745019505, "learning_rate": 4.3893014025798245e-05, "loss": 0.844, "step": 199870 }, { "epoch": 3.5091908214680734, "grad_norm": 0.05255087471140886, "learning_rate": 4.3883911617687e-05, "loss": 0.846, "step": 199880 }, { "epoch": 3.509366386348075, "grad_norm": 0.04454381245598604, "learning_rate": 4.387480993324756e-05, "loss": 0.8404, "step": 199890 }, { "epoch": 3.5095419512280763, "grad_norm": 0.06482225269417662, "learning_rate": 4.3865708972595185e-05, "loss": 0.8421, "step": 199900 }, { "epoch": 3.509717516108078, "grad_norm": 0.04377953591739905, "learning_rate": 4.385660873584528e-05, "loss": 0.8463, "step": 199910 }, { "epoch": 3.5098930809880793, "grad_norm": 0.04745999480944986, "learning_rate": 4.384750922311302e-05, "loss": 0.8377, "step": 199920 }, { "epoch": 3.5100686458680803, "grad_norm": 0.06493829804113885, "learning_rate": 4.383841043451384e-05, "loss": 0.8346, "step": 199930 }, { "epoch": 3.510244210748082, "grad_norm": 0.0589265140652444, "learning_rate": 4.382931237016291e-05, "loss": 0.8368, "step": 199940 }, { "epoch": 3.5104197756280833, "grad_norm": 0.060380696158208166, "learning_rate": 4.382021503017555e-05, "loss": 0.8424, "step": 199950 }, { "epoch": 3.5105953405080847, "grad_norm": 0.050706940901943734, "learning_rate": 4.3811118414667074e-05, "loss": 0.8478, "step": 199960 }, { "epoch": 3.5107709053880862, "grad_norm": 0.04938966974334838, "learning_rate": 4.380202252375264e-05, "loss": 0.8377, "step": 199970 }, { "epoch": 3.5109464702680877, "grad_norm": 0.04991631325885726, "learning_rate": 4.379292735754755e-05, "loss": 0.8445, "step": 199980 }, { "epoch": 3.511122035148089, "grad_norm": 0.05063680576262859, "learning_rate": 4.378383291616703e-05, "loss": 0.8433, "step": 199990 }, { "epoch": 3.51129760002809, "grad_norm": 0.045921490217110464, "learning_rate": 4.377473919972634e-05, "loss": 0.8377, "step": 200000 }, { "epoch": 3.5114731649080917, "grad_norm": 0.060426711658194215, "learning_rate": 4.376564620834068e-05, "loss": 0.8436, "step": 200010 }, { "epoch": 3.511648729788093, "grad_norm": 0.07501624178548578, "learning_rate": 4.375655394212518e-05, "loss": 0.84, "step": 200020 }, { "epoch": 3.5118242946680946, "grad_norm": 0.051672644605706024, "learning_rate": 4.3747462401195194e-05, "loss": 0.835, "step": 200030 }, { "epoch": 3.511999859548096, "grad_norm": 0.04874787225489718, "learning_rate": 4.3738371585665775e-05, "loss": 0.8339, "step": 200040 }, { "epoch": 3.512175424428097, "grad_norm": 0.06404115252799217, "learning_rate": 4.3729281495652174e-05, "loss": 0.8392, "step": 200050 }, { "epoch": 3.512350989308099, "grad_norm": 0.05442809462692098, "learning_rate": 4.3720192131269595e-05, "loss": 0.8429, "step": 200060 }, { "epoch": 3.5125265541881, "grad_norm": 0.04703821381212055, "learning_rate": 4.371110349263311e-05, "loss": 0.8403, "step": 200070 }, { "epoch": 3.5127021190681016, "grad_norm": 0.07621777699175948, "learning_rate": 4.370201557985797e-05, "loss": 0.8358, "step": 200080 }, { "epoch": 3.512877683948103, "grad_norm": 0.04284788835023576, "learning_rate": 4.3692928393059195e-05, "loss": 0.8408, "step": 200090 }, { "epoch": 3.5130532488281045, "grad_norm": 0.061793403053717104, "learning_rate": 4.368384193235208e-05, "loss": 0.837, "step": 200100 }, { "epoch": 3.513228813708106, "grad_norm": 0.060153546628811484, "learning_rate": 4.3674756197851626e-05, "loss": 0.8325, "step": 200110 }, { "epoch": 3.513404378588107, "grad_norm": 0.06594220254423455, "learning_rate": 4.3665671189673e-05, "loss": 0.8427, "step": 200120 }, { "epoch": 3.5135799434681085, "grad_norm": 0.040698407574556894, "learning_rate": 4.365658690793134e-05, "loss": 0.8437, "step": 200130 }, { "epoch": 3.51375550834811, "grad_norm": 0.0631111114245094, "learning_rate": 4.364750335274169e-05, "loss": 0.841, "step": 200140 }, { "epoch": 3.5139310732281115, "grad_norm": 0.060637114329597314, "learning_rate": 4.363842052421915e-05, "loss": 0.8474, "step": 200150 }, { "epoch": 3.514106638108113, "grad_norm": 0.05096224853498794, "learning_rate": 4.3629338422478834e-05, "loss": 0.8384, "step": 200160 }, { "epoch": 3.514282202988114, "grad_norm": 0.06012695491238213, "learning_rate": 4.3620257047635814e-05, "loss": 0.8422, "step": 200170 }, { "epoch": 3.514457767868116, "grad_norm": 0.05557316116861647, "learning_rate": 4.3611176399805136e-05, "loss": 0.841, "step": 200180 }, { "epoch": 3.514633332748117, "grad_norm": 0.06299761939125324, "learning_rate": 4.360209647910179e-05, "loss": 0.8417, "step": 200190 }, { "epoch": 3.5148088976281184, "grad_norm": 0.048451978478948936, "learning_rate": 4.3593017285640944e-05, "loss": 0.8405, "step": 200200 }, { "epoch": 3.51498446250812, "grad_norm": 0.05208389474457796, "learning_rate": 4.358393881953753e-05, "loss": 0.8426, "step": 200210 }, { "epoch": 3.5151600273881214, "grad_norm": 0.05544420520321481, "learning_rate": 4.3574861080906624e-05, "loss": 0.8467, "step": 200220 }, { "epoch": 3.515335592268123, "grad_norm": 0.05076551425067777, "learning_rate": 4.3565784069863276e-05, "loss": 0.8412, "step": 200230 }, { "epoch": 3.515511157148124, "grad_norm": 0.051521259570557434, "learning_rate": 4.35567077865224e-05, "loss": 0.8475, "step": 200240 }, { "epoch": 3.5156867220281254, "grad_norm": 0.040401266873476824, "learning_rate": 4.354763223099909e-05, "loss": 0.8452, "step": 200250 }, { "epoch": 3.515862286908127, "grad_norm": 0.04688066282121088, "learning_rate": 4.353855740340822e-05, "loss": 0.8435, "step": 200260 }, { "epoch": 3.5160378517881283, "grad_norm": 0.049432269706259614, "learning_rate": 4.35294833038649e-05, "loss": 0.8386, "step": 200270 }, { "epoch": 3.51621341666813, "grad_norm": 0.0501292903133393, "learning_rate": 4.3520409932484025e-05, "loss": 0.8421, "step": 200280 }, { "epoch": 3.516388981548131, "grad_norm": 0.04284844601911539, "learning_rate": 4.351133728938055e-05, "loss": 0.8488, "step": 200290 }, { "epoch": 3.5165645464281328, "grad_norm": 0.051977273537785765, "learning_rate": 4.350226537466951e-05, "loss": 0.8345, "step": 200300 }, { "epoch": 3.516740111308134, "grad_norm": 0.047303334100046196, "learning_rate": 4.349319418846573e-05, "loss": 0.8357, "step": 200310 }, { "epoch": 3.5169156761881353, "grad_norm": 0.046097528262605, "learning_rate": 4.348412373088422e-05, "loss": 0.8384, "step": 200320 }, { "epoch": 3.5170912410681368, "grad_norm": 0.04962340780071639, "learning_rate": 4.347505400203987e-05, "loss": 0.8381, "step": 200330 }, { "epoch": 3.5172668059481382, "grad_norm": 0.05749064573442487, "learning_rate": 4.346598500204765e-05, "loss": 0.8416, "step": 200340 }, { "epoch": 3.5174423708281397, "grad_norm": 0.054715299091778406, "learning_rate": 4.345691673102242e-05, "loss": 0.8472, "step": 200350 }, { "epoch": 3.5176179357081407, "grad_norm": 0.050159142306578544, "learning_rate": 4.344784918907902e-05, "loss": 0.8377, "step": 200360 }, { "epoch": 3.517793500588142, "grad_norm": 0.07058592307034914, "learning_rate": 4.343878237633247e-05, "loss": 0.8415, "step": 200370 }, { "epoch": 3.5179690654681437, "grad_norm": 0.04998301831233742, "learning_rate": 4.342971629289754e-05, "loss": 0.8375, "step": 200380 }, { "epoch": 3.518144630348145, "grad_norm": 0.05460613765618193, "learning_rate": 4.3420650938889136e-05, "loss": 0.8356, "step": 200390 }, { "epoch": 3.5183201952281467, "grad_norm": 0.05154408626693823, "learning_rate": 4.3411586314422165e-05, "loss": 0.8337, "step": 200400 }, { "epoch": 3.518495760108148, "grad_norm": 0.048505066758184265, "learning_rate": 4.340252241961139e-05, "loss": 0.8447, "step": 200410 }, { "epoch": 3.5186713249881496, "grad_norm": 0.07605049429855563, "learning_rate": 4.339345925457173e-05, "loss": 0.8381, "step": 200420 }, { "epoch": 3.5188468898681506, "grad_norm": 0.046504410820231094, "learning_rate": 4.338439681941792e-05, "loss": 0.8388, "step": 200430 }, { "epoch": 3.519022454748152, "grad_norm": 0.06161400844388744, "learning_rate": 4.33753351142649e-05, "loss": 0.8383, "step": 200440 }, { "epoch": 3.5191980196281536, "grad_norm": 0.060346243684677246, "learning_rate": 4.3366274139227396e-05, "loss": 0.8379, "step": 200450 }, { "epoch": 3.519373584508155, "grad_norm": 0.05928492553013754, "learning_rate": 4.3357213894420254e-05, "loss": 0.8466, "step": 200460 }, { "epoch": 3.5195491493881566, "grad_norm": 0.05710179586020797, "learning_rate": 4.3348154379958286e-05, "loss": 0.8463, "step": 200470 }, { "epoch": 3.5197247142681576, "grad_norm": 0.06250548178000355, "learning_rate": 4.333909559595621e-05, "loss": 0.8386, "step": 200480 }, { "epoch": 3.519900279148159, "grad_norm": 0.051309685216415345, "learning_rate": 4.333003754252886e-05, "loss": 0.8386, "step": 200490 }, { "epoch": 3.5200758440281605, "grad_norm": 0.054221509151039994, "learning_rate": 4.3320980219790975e-05, "loss": 0.8415, "step": 200500 }, { "epoch": 3.520251408908162, "grad_norm": 0.06538182838555578, "learning_rate": 4.331192362785736e-05, "loss": 0.839, "step": 200510 }, { "epoch": 3.5204269737881635, "grad_norm": 0.04594577672926932, "learning_rate": 4.330286776684274e-05, "loss": 0.837, "step": 200520 }, { "epoch": 3.520602538668165, "grad_norm": 0.046746072361353176, "learning_rate": 4.329381263686176e-05, "loss": 0.8445, "step": 200530 }, { "epoch": 3.5207781035481664, "grad_norm": 0.043033327925548266, "learning_rate": 4.3284758238029304e-05, "loss": 0.8328, "step": 200540 }, { "epoch": 3.5209536684281675, "grad_norm": 0.04319302898493033, "learning_rate": 4.3275704570459994e-05, "loss": 0.8465, "step": 200550 }, { "epoch": 3.521129233308169, "grad_norm": 0.05351123324724292, "learning_rate": 4.326665163426856e-05, "loss": 0.8397, "step": 200560 }, { "epoch": 3.5213047981881704, "grad_norm": 0.0549013181792766, "learning_rate": 4.325759942956976e-05, "loss": 0.8469, "step": 200570 }, { "epoch": 3.521480363068172, "grad_norm": 0.0508505728736163, "learning_rate": 4.324854795647821e-05, "loss": 0.8432, "step": 200580 }, { "epoch": 3.5216559279481734, "grad_norm": 0.052093035184010185, "learning_rate": 4.323949721510865e-05, "loss": 0.8334, "step": 200590 }, { "epoch": 3.5218314928281744, "grad_norm": 0.06919383874189088, "learning_rate": 4.3230447205575656e-05, "loss": 0.8418, "step": 200600 }, { "epoch": 3.522007057708176, "grad_norm": 0.04945167602892831, "learning_rate": 4.322139792799405e-05, "loss": 0.8363, "step": 200610 }, { "epoch": 3.5221826225881774, "grad_norm": 0.05561150877150249, "learning_rate": 4.3212349382478365e-05, "loss": 0.8409, "step": 200620 }, { "epoch": 3.522358187468179, "grad_norm": 0.05708291841874271, "learning_rate": 4.320330156914329e-05, "loss": 0.8399, "step": 200630 }, { "epoch": 3.5225337523481803, "grad_norm": 0.04783558247722127, "learning_rate": 4.319425448810348e-05, "loss": 0.8412, "step": 200640 }, { "epoch": 3.522709317228182, "grad_norm": 0.06457255324371095, "learning_rate": 4.318520813947353e-05, "loss": 0.8388, "step": 200650 }, { "epoch": 3.5228848821081833, "grad_norm": 0.05053482786127865, "learning_rate": 4.317616252336807e-05, "loss": 0.8453, "step": 200660 }, { "epoch": 3.5230604469881843, "grad_norm": 0.058359960472974086, "learning_rate": 4.316711763990171e-05, "loss": 0.8463, "step": 200670 }, { "epoch": 3.523236011868186, "grad_norm": 0.04737950395158914, "learning_rate": 4.315807348918908e-05, "loss": 0.8431, "step": 200680 }, { "epoch": 3.5234115767481873, "grad_norm": 0.06340915772919703, "learning_rate": 4.314903007134476e-05, "loss": 0.843, "step": 200690 }, { "epoch": 3.5235871416281888, "grad_norm": 0.04536663239342469, "learning_rate": 4.3139987386483226e-05, "loss": 0.832, "step": 200700 }, { "epoch": 3.5237627065081902, "grad_norm": 0.056985546756060046, "learning_rate": 4.3130945434719223e-05, "loss": 0.8401, "step": 200710 }, { "epoch": 3.5239382713881913, "grad_norm": 0.04522381996816818, "learning_rate": 4.31219042161672e-05, "loss": 0.8372, "step": 200720 }, { "epoch": 3.5241138362681927, "grad_norm": 0.0525512118165931, "learning_rate": 4.311286373094174e-05, "loss": 0.8353, "step": 200730 }, { "epoch": 3.5242894011481942, "grad_norm": 0.043526638043746665, "learning_rate": 4.3103823979157424e-05, "loss": 0.8487, "step": 200740 }, { "epoch": 3.5244649660281957, "grad_norm": 0.06008058536811828, "learning_rate": 4.309478496092871e-05, "loss": 0.8426, "step": 200750 }, { "epoch": 3.524640530908197, "grad_norm": 0.05359132617709003, "learning_rate": 4.3085746676370214e-05, "loss": 0.8446, "step": 200760 }, { "epoch": 3.5248160957881987, "grad_norm": 0.04905551921850902, "learning_rate": 4.307670912559634e-05, "loss": 0.8447, "step": 200770 }, { "epoch": 3.5249916606682, "grad_norm": 0.06869321591238502, "learning_rate": 4.306767230872172e-05, "loss": 0.8353, "step": 200780 }, { "epoch": 3.525167225548201, "grad_norm": 0.042750199100669566, "learning_rate": 4.305863622586078e-05, "loss": 0.843, "step": 200790 }, { "epoch": 3.5253427904282026, "grad_norm": 0.05346122387441395, "learning_rate": 4.3049600877127996e-05, "loss": 0.8521, "step": 200800 }, { "epoch": 3.525518355308204, "grad_norm": 0.0528441221934692, "learning_rate": 4.304056626263793e-05, "loss": 0.8453, "step": 200810 }, { "epoch": 3.5256939201882056, "grad_norm": 0.04527935388183837, "learning_rate": 4.3031532382504965e-05, "loss": 0.8409, "step": 200820 }, { "epoch": 3.525869485068207, "grad_norm": 0.10367704298820406, "learning_rate": 4.302249923684357e-05, "loss": 0.8367, "step": 200830 }, { "epoch": 3.526045049948208, "grad_norm": 0.05866343978003809, "learning_rate": 4.3013466825768244e-05, "loss": 0.833, "step": 200840 }, { "epoch": 3.52622061482821, "grad_norm": 0.048623479220839295, "learning_rate": 4.300443514939344e-05, "loss": 0.8359, "step": 200850 }, { "epoch": 3.526396179708211, "grad_norm": 0.04864697716908669, "learning_rate": 4.299540420783354e-05, "loss": 0.846, "step": 200860 }, { "epoch": 3.5265717445882125, "grad_norm": 0.0527306001702434, "learning_rate": 4.298637400120294e-05, "loss": 0.8412, "step": 200870 }, { "epoch": 3.526747309468214, "grad_norm": 0.05436052382282177, "learning_rate": 4.2977344529616146e-05, "loss": 0.8373, "step": 200880 }, { "epoch": 3.5269228743482155, "grad_norm": 0.04838642063469154, "learning_rate": 4.29683157931875e-05, "loss": 0.8414, "step": 200890 }, { "epoch": 3.527098439228217, "grad_norm": 0.06933131659805908, "learning_rate": 4.2959287792031396e-05, "loss": 0.8342, "step": 200900 }, { "epoch": 3.527274004108218, "grad_norm": 0.0567725618347821, "learning_rate": 4.295026052626229e-05, "loss": 0.8463, "step": 200910 }, { "epoch": 3.5274495689882195, "grad_norm": 0.059354647316631017, "learning_rate": 4.294123399599447e-05, "loss": 0.8412, "step": 200920 }, { "epoch": 3.527625133868221, "grad_norm": 0.07152719936550288, "learning_rate": 4.293220820134238e-05, "loss": 0.8343, "step": 200930 }, { "epoch": 3.5278006987482224, "grad_norm": 0.06451670529074172, "learning_rate": 4.292318314242025e-05, "loss": 0.8469, "step": 200940 }, { "epoch": 3.527976263628224, "grad_norm": 0.054274443477392506, "learning_rate": 4.2914158819342615e-05, "loss": 0.8419, "step": 200950 }, { "epoch": 3.528151828508225, "grad_norm": 0.05370735960782895, "learning_rate": 4.290513523222368e-05, "loss": 0.846, "step": 200960 }, { "epoch": 3.528327393388227, "grad_norm": 0.046032293894312196, "learning_rate": 4.2896112381177807e-05, "loss": 0.8405, "step": 200970 }, { "epoch": 3.528502958268228, "grad_norm": 0.05418648322146241, "learning_rate": 4.2887090266319374e-05, "loss": 0.8367, "step": 200980 }, { "epoch": 3.5286785231482294, "grad_norm": 0.04953875351859977, "learning_rate": 4.28780688877626e-05, "loss": 0.8446, "step": 200990 }, { "epoch": 3.528854088028231, "grad_norm": 0.0562676623498435, "learning_rate": 4.2869048245621825e-05, "loss": 0.8397, "step": 201000 }, { "epoch": 3.5290296529082323, "grad_norm": 0.05450705292008742, "learning_rate": 4.286002834001136e-05, "loss": 0.8387, "step": 201010 }, { "epoch": 3.529205217788234, "grad_norm": 0.04344498216675427, "learning_rate": 4.285100917104551e-05, "loss": 0.8378, "step": 201020 }, { "epoch": 3.529380782668235, "grad_norm": 0.0518387944986638, "learning_rate": 4.2841990738838515e-05, "loss": 0.8398, "step": 201030 }, { "epoch": 3.5295563475482363, "grad_norm": 0.05343799360328918, "learning_rate": 4.283297304350458e-05, "loss": 0.8424, "step": 201040 }, { "epoch": 3.529731912428238, "grad_norm": 0.06737733149927795, "learning_rate": 4.282395608515809e-05, "loss": 0.8336, "step": 201050 }, { "epoch": 3.5299074773082393, "grad_norm": 0.0523217549803375, "learning_rate": 4.28149398639132e-05, "loss": 0.8419, "step": 201060 }, { "epoch": 3.5300830421882408, "grad_norm": 0.06014229952626238, "learning_rate": 4.2805924379884155e-05, "loss": 0.8501, "step": 201070 }, { "epoch": 3.530258607068242, "grad_norm": 0.04811005731577163, "learning_rate": 4.279690963318525e-05, "loss": 0.8507, "step": 201080 }, { "epoch": 3.5304341719482437, "grad_norm": 0.06211751577596833, "learning_rate": 4.278789562393062e-05, "loss": 0.8324, "step": 201090 }, { "epoch": 3.5306097368282447, "grad_norm": 0.04899702202597878, "learning_rate": 4.277888235223454e-05, "loss": 0.8423, "step": 201100 }, { "epoch": 3.5307853017082462, "grad_norm": 0.05288289677461504, "learning_rate": 4.27698698182111e-05, "loss": 0.8333, "step": 201110 }, { "epoch": 3.5309608665882477, "grad_norm": 0.04825875734468679, "learning_rate": 4.276085802197465e-05, "loss": 0.8355, "step": 201120 }, { "epoch": 3.531136431468249, "grad_norm": 0.054957521256606995, "learning_rate": 4.2751846963639255e-05, "loss": 0.84, "step": 201130 }, { "epoch": 3.5313119963482507, "grad_norm": 0.04586619716459698, "learning_rate": 4.274283664331912e-05, "loss": 0.8415, "step": 201140 }, { "epoch": 3.5314875612282517, "grad_norm": 0.06653702320531148, "learning_rate": 4.273382706112844e-05, "loss": 0.8384, "step": 201150 }, { "epoch": 3.531663126108253, "grad_norm": 0.061280756754452076, "learning_rate": 4.2724818217181305e-05, "loss": 0.8358, "step": 201160 }, { "epoch": 3.5318386909882546, "grad_norm": 0.06029525898069388, "learning_rate": 4.2715810111591904e-05, "loss": 0.8446, "step": 201170 }, { "epoch": 3.532014255868256, "grad_norm": 0.06006420745862922, "learning_rate": 4.270680274447434e-05, "loss": 0.8398, "step": 201180 }, { "epoch": 3.5321898207482576, "grad_norm": 0.056744105621414645, "learning_rate": 4.2697796115942804e-05, "loss": 0.8452, "step": 201190 }, { "epoch": 3.532365385628259, "grad_norm": 0.05840971198728881, "learning_rate": 4.2688790226111355e-05, "loss": 0.8429, "step": 201200 }, { "epoch": 3.5325409505082606, "grad_norm": 0.068777989934829, "learning_rate": 4.2679785075094055e-05, "loss": 0.84, "step": 201210 }, { "epoch": 3.5327165153882616, "grad_norm": 0.06460305610724093, "learning_rate": 4.267078066300511e-05, "loss": 0.8472, "step": 201220 }, { "epoch": 3.532892080268263, "grad_norm": 0.05761812687298573, "learning_rate": 4.266177698995852e-05, "loss": 0.8431, "step": 201230 }, { "epoch": 3.5330676451482645, "grad_norm": 0.0513825693913799, "learning_rate": 4.265277405606841e-05, "loss": 0.8399, "step": 201240 }, { "epoch": 3.533243210028266, "grad_norm": 0.07529745142997883, "learning_rate": 4.264377186144886e-05, "loss": 0.8384, "step": 201250 }, { "epoch": 3.5334187749082675, "grad_norm": 0.05740060929159923, "learning_rate": 4.263477040621387e-05, "loss": 0.8398, "step": 201260 }, { "epoch": 3.5335943397882685, "grad_norm": 0.06174356692341561, "learning_rate": 4.262576969047756e-05, "loss": 0.8546, "step": 201270 }, { "epoch": 3.53376990466827, "grad_norm": 0.05383807159638765, "learning_rate": 4.261676971435385e-05, "loss": 0.8423, "step": 201280 }, { "epoch": 3.5339454695482715, "grad_norm": 0.05526515147242702, "learning_rate": 4.2607770477956955e-05, "loss": 0.8429, "step": 201290 }, { "epoch": 3.534121034428273, "grad_norm": 0.06794025869132729, "learning_rate": 4.259877198140075e-05, "loss": 0.8392, "step": 201300 }, { "epoch": 3.5342965993082744, "grad_norm": 0.05390338530655235, "learning_rate": 4.258977422479929e-05, "loss": 0.8375, "step": 201310 }, { "epoch": 3.534472164188276, "grad_norm": 0.06232352863114447, "learning_rate": 4.258077720826663e-05, "loss": 0.842, "step": 201320 }, { "epoch": 3.5346477290682774, "grad_norm": 0.055822282410021996, "learning_rate": 4.257178093191668e-05, "loss": 0.8343, "step": 201330 }, { "epoch": 3.5348232939482784, "grad_norm": 0.058092520511197066, "learning_rate": 4.2562785395863454e-05, "loss": 0.8363, "step": 201340 }, { "epoch": 3.53499885882828, "grad_norm": 0.04435025589131343, "learning_rate": 4.2553790600220955e-05, "loss": 0.8486, "step": 201350 }, { "epoch": 3.5351744237082814, "grad_norm": 0.052657310697442485, "learning_rate": 4.254479654510315e-05, "loss": 0.8398, "step": 201360 }, { "epoch": 3.535349988588283, "grad_norm": 0.04899493858366757, "learning_rate": 4.253580323062398e-05, "loss": 0.8406, "step": 201370 }, { "epoch": 3.5355255534682843, "grad_norm": 0.05271448966053136, "learning_rate": 4.252681065689732e-05, "loss": 0.8335, "step": 201380 }, { "epoch": 3.5357011183482854, "grad_norm": 0.06390582867674015, "learning_rate": 4.251781882403723e-05, "loss": 0.8403, "step": 201390 }, { "epoch": 3.535876683228287, "grad_norm": 0.09197194866727099, "learning_rate": 4.2508827732157576e-05, "loss": 0.8336, "step": 201400 }, { "epoch": 3.5360522481082883, "grad_norm": 0.0482145070071272, "learning_rate": 4.249983738137227e-05, "loss": 0.8408, "step": 201410 }, { "epoch": 3.53622781298829, "grad_norm": 0.04504749963476472, "learning_rate": 4.249084777179528e-05, "loss": 0.8396, "step": 201420 }, { "epoch": 3.5364033778682913, "grad_norm": 0.04679672764023624, "learning_rate": 4.2481858903540415e-05, "loss": 0.8429, "step": 201430 }, { "epoch": 3.5365789427482928, "grad_norm": 0.06127377894028651, "learning_rate": 4.2472870776721646e-05, "loss": 0.8413, "step": 201440 }, { "epoch": 3.5367545076282942, "grad_norm": 0.06834177909302482, "learning_rate": 4.246388339145276e-05, "loss": 0.84, "step": 201450 }, { "epoch": 3.5369300725082953, "grad_norm": 0.06396679257824499, "learning_rate": 4.2454896747847756e-05, "loss": 0.8386, "step": 201460 }, { "epoch": 3.5371056373882968, "grad_norm": 0.05556740075290939, "learning_rate": 4.244591084602039e-05, "loss": 0.8397, "step": 201470 }, { "epoch": 3.5372812022682982, "grad_norm": 0.056128287259415946, "learning_rate": 4.2436925686084567e-05, "loss": 0.85, "step": 201480 }, { "epoch": 3.5374567671482997, "grad_norm": 0.05558697365864548, "learning_rate": 4.242794126815415e-05, "loss": 0.8384, "step": 201490 }, { "epoch": 3.537632332028301, "grad_norm": 0.051895609397885886, "learning_rate": 4.2418957592342904e-05, "loss": 0.8409, "step": 201500 }, { "epoch": 3.537807896908302, "grad_norm": 0.059973566189913514, "learning_rate": 4.240997465876469e-05, "loss": 0.8399, "step": 201510 }, { "epoch": 3.537983461788304, "grad_norm": 0.043842334543189004, "learning_rate": 4.2400992467533334e-05, "loss": 0.8403, "step": 201520 }, { "epoch": 3.538159026668305, "grad_norm": 0.0557468648781786, "learning_rate": 4.239201101876267e-05, "loss": 0.8443, "step": 201530 }, { "epoch": 3.5383345915483067, "grad_norm": 0.060151814973549905, "learning_rate": 4.238303031256645e-05, "loss": 0.8378, "step": 201540 }, { "epoch": 3.538510156428308, "grad_norm": 0.05657778645928298, "learning_rate": 4.237405034905841e-05, "loss": 0.837, "step": 201550 }, { "epoch": 3.5386857213083096, "grad_norm": 0.05572474609075534, "learning_rate": 4.236507112835245e-05, "loss": 0.8365, "step": 201560 }, { "epoch": 3.538861286188311, "grad_norm": 0.05921028226383846, "learning_rate": 4.235609265056224e-05, "loss": 0.8385, "step": 201570 }, { "epoch": 3.539036851068312, "grad_norm": 0.05317515894513794, "learning_rate": 4.234711491580158e-05, "loss": 0.8337, "step": 201580 }, { "epoch": 3.5392124159483136, "grad_norm": 0.05182159976608913, "learning_rate": 4.2338137924184233e-05, "loss": 0.8435, "step": 201590 }, { "epoch": 3.539387980828315, "grad_norm": 0.039810414390963444, "learning_rate": 4.232916167582391e-05, "loss": 0.8414, "step": 201600 }, { "epoch": 3.5395635457083165, "grad_norm": 0.05534585151638206, "learning_rate": 4.232018617083438e-05, "loss": 0.8396, "step": 201610 }, { "epoch": 3.539739110588318, "grad_norm": 0.052290184934389534, "learning_rate": 4.231121140932927e-05, "loss": 0.8413, "step": 201620 }, { "epoch": 3.539914675468319, "grad_norm": 0.054413821981248466, "learning_rate": 4.230223739142242e-05, "loss": 0.843, "step": 201630 }, { "epoch": 3.540090240348321, "grad_norm": 0.050725868030932215, "learning_rate": 4.2293264117227434e-05, "loss": 0.8348, "step": 201640 }, { "epoch": 3.540265805228322, "grad_norm": 0.05769677718067033, "learning_rate": 4.228429158685805e-05, "loss": 0.8379, "step": 201650 }, { "epoch": 3.5404413701083235, "grad_norm": 0.06771822867749121, "learning_rate": 4.227531980042798e-05, "loss": 0.8434, "step": 201660 }, { "epoch": 3.540616934988325, "grad_norm": 0.05753052573653131, "learning_rate": 4.2266348758050825e-05, "loss": 0.844, "step": 201670 }, { "epoch": 3.5407924998683264, "grad_norm": 0.055005529824736416, "learning_rate": 4.2257378459840295e-05, "loss": 0.8369, "step": 201680 }, { "epoch": 3.540968064748328, "grad_norm": 0.04997246293044959, "learning_rate": 4.224840890591003e-05, "loss": 0.8444, "step": 201690 }, { "epoch": 3.541143629628329, "grad_norm": 0.04140668048217736, "learning_rate": 4.223944009637372e-05, "loss": 0.8439, "step": 201700 }, { "epoch": 3.5413191945083304, "grad_norm": 0.05507477734404706, "learning_rate": 4.223047203134497e-05, "loss": 0.8424, "step": 201710 }, { "epoch": 3.541494759388332, "grad_norm": 0.061305070212260185, "learning_rate": 4.2221504710937324e-05, "loss": 0.8412, "step": 201720 }, { "epoch": 3.5416703242683334, "grad_norm": 0.0686124377756459, "learning_rate": 4.221253813526455e-05, "loss": 0.8353, "step": 201730 }, { "epoch": 3.541845889148335, "grad_norm": 0.07199832125226793, "learning_rate": 4.220357230444015e-05, "loss": 0.8403, "step": 201740 }, { "epoch": 3.542021454028336, "grad_norm": 0.0640870363474303, "learning_rate": 4.2194607218577763e-05, "loss": 0.8371, "step": 201750 }, { "epoch": 3.542197018908338, "grad_norm": 0.05442392487745125, "learning_rate": 4.218564287779101e-05, "loss": 0.8506, "step": 201760 }, { "epoch": 3.542372583788339, "grad_norm": 0.05143432806152093, "learning_rate": 4.217667928219339e-05, "loss": 0.8379, "step": 201770 }, { "epoch": 3.5425481486683403, "grad_norm": 0.0532560769852996, "learning_rate": 4.2167716431898546e-05, "loss": 0.8452, "step": 201780 }, { "epoch": 3.542723713548342, "grad_norm": 0.04958556734858083, "learning_rate": 4.2158754327019954e-05, "loss": 0.8459, "step": 201790 }, { "epoch": 3.5428992784283433, "grad_norm": 0.04723383673247859, "learning_rate": 4.214979296767128e-05, "loss": 0.8275, "step": 201800 }, { "epoch": 3.5430748433083448, "grad_norm": 0.05408189052213284, "learning_rate": 4.2140832353965984e-05, "loss": 0.8405, "step": 201810 }, { "epoch": 3.543250408188346, "grad_norm": 0.04572748951677873, "learning_rate": 4.213187248601762e-05, "loss": 0.8402, "step": 201820 }, { "epoch": 3.5434259730683473, "grad_norm": 0.05164368026723292, "learning_rate": 4.212291336393974e-05, "loss": 0.8406, "step": 201830 }, { "epoch": 3.5436015379483488, "grad_norm": 0.04233450146974177, "learning_rate": 4.21139549878458e-05, "loss": 0.8412, "step": 201840 }, { "epoch": 3.5437771028283502, "grad_norm": 0.04213621604336203, "learning_rate": 4.210499735784934e-05, "loss": 0.8342, "step": 201850 }, { "epoch": 3.5439526677083517, "grad_norm": 0.0528052708691953, "learning_rate": 4.209604047406385e-05, "loss": 0.8407, "step": 201860 }, { "epoch": 3.544128232588353, "grad_norm": 0.06032320249162949, "learning_rate": 4.2087084336602845e-05, "loss": 0.8411, "step": 201870 }, { "epoch": 3.5443037974683547, "grad_norm": 0.047129713590149065, "learning_rate": 4.2078128945579785e-05, "loss": 0.8412, "step": 201880 }, { "epoch": 3.5444793623483557, "grad_norm": 0.04553866619285527, "learning_rate": 4.206917430110804e-05, "loss": 0.8349, "step": 201890 }, { "epoch": 3.544654927228357, "grad_norm": 0.06492408687639692, "learning_rate": 4.206022040330123e-05, "loss": 0.8344, "step": 201900 }, { "epoch": 3.5448304921083587, "grad_norm": 0.05534338829534055, "learning_rate": 4.2051267252272675e-05, "loss": 0.84, "step": 201910 }, { "epoch": 3.54500605698836, "grad_norm": 0.04890774708605247, "learning_rate": 4.204231484813587e-05, "loss": 0.8549, "step": 201920 }, { "epoch": 3.5451816218683616, "grad_norm": 0.05011052805679417, "learning_rate": 4.203336319100428e-05, "loss": 0.8343, "step": 201930 }, { "epoch": 3.5453571867483626, "grad_norm": 0.07776652514246181, "learning_rate": 4.202441228099122e-05, "loss": 0.8404, "step": 201940 }, { "epoch": 3.545532751628364, "grad_norm": 0.03941805929930275, "learning_rate": 4.201546211821021e-05, "loss": 0.8407, "step": 201950 }, { "epoch": 3.5457083165083656, "grad_norm": 0.0582557028494856, "learning_rate": 4.200651270277454e-05, "loss": 0.8411, "step": 201960 }, { "epoch": 3.545883881388367, "grad_norm": 0.05569094157416411, "learning_rate": 4.199756403479771e-05, "loss": 0.8324, "step": 201970 }, { "epoch": 3.5460594462683686, "grad_norm": 0.050083648282018806, "learning_rate": 4.198861611439303e-05, "loss": 0.8374, "step": 201980 }, { "epoch": 3.54623501114837, "grad_norm": 0.05361188233807025, "learning_rate": 4.197966894167388e-05, "loss": 0.8413, "step": 201990 }, { "epoch": 3.5464105760283715, "grad_norm": 0.05144418915026631, "learning_rate": 4.197072251675369e-05, "loss": 0.8354, "step": 202000 }, { "epoch": 3.5465861409083725, "grad_norm": 0.06439262003378204, "learning_rate": 4.196177683974571e-05, "loss": 0.8429, "step": 202010 }, { "epoch": 3.546761705788374, "grad_norm": 0.05658721792952366, "learning_rate": 4.195283191076336e-05, "loss": 0.849, "step": 202020 }, { "epoch": 3.5469372706683755, "grad_norm": 0.058842674475488695, "learning_rate": 4.1943887729919896e-05, "loss": 0.842, "step": 202030 }, { "epoch": 3.547112835548377, "grad_norm": 0.04566019387411297, "learning_rate": 4.193494429732875e-05, "loss": 0.8408, "step": 202040 }, { "epoch": 3.5472884004283785, "grad_norm": 0.05562917604068131, "learning_rate": 4.192600161310318e-05, "loss": 0.8283, "step": 202050 }, { "epoch": 3.5474639653083795, "grad_norm": 0.049642852304689324, "learning_rate": 4.1917059677356425e-05, "loss": 0.8425, "step": 202060 }, { "epoch": 3.547639530188381, "grad_norm": 0.04900916811387846, "learning_rate": 4.1908118490201916e-05, "loss": 0.831, "step": 202070 }, { "epoch": 3.5478150950683824, "grad_norm": 0.051460572172552184, "learning_rate": 4.189917805175283e-05, "loss": 0.8453, "step": 202080 }, { "epoch": 3.547990659948384, "grad_norm": 0.05186559451715673, "learning_rate": 4.189023836212249e-05, "loss": 0.8336, "step": 202090 }, { "epoch": 3.5481662248283854, "grad_norm": 0.0835786660714101, "learning_rate": 4.188129942142419e-05, "loss": 0.8425, "step": 202100 }, { "epoch": 3.548341789708387, "grad_norm": 0.05708805642588019, "learning_rate": 4.187236122977113e-05, "loss": 0.8395, "step": 202110 }, { "epoch": 3.5485173545883884, "grad_norm": 0.05223368829452231, "learning_rate": 4.186342378727661e-05, "loss": 0.8457, "step": 202120 }, { "epoch": 3.5486929194683894, "grad_norm": 0.0712990768310104, "learning_rate": 4.185448709405378e-05, "loss": 0.8406, "step": 202130 }, { "epoch": 3.548868484348391, "grad_norm": 0.04459239130009099, "learning_rate": 4.1845551150216e-05, "loss": 0.8343, "step": 202140 }, { "epoch": 3.5490440492283923, "grad_norm": 0.06812340645276509, "learning_rate": 4.183661595587638e-05, "loss": 0.8363, "step": 202150 }, { "epoch": 3.549219614108394, "grad_norm": 0.050801589733177784, "learning_rate": 4.1827681511148186e-05, "loss": 0.8404, "step": 202160 }, { "epoch": 3.5493951789883953, "grad_norm": 0.0873724583652728, "learning_rate": 4.1818747816144646e-05, "loss": 0.8382, "step": 202170 }, { "epoch": 3.5495707438683963, "grad_norm": 0.0427164449413722, "learning_rate": 4.180981487097887e-05, "loss": 0.847, "step": 202180 }, { "epoch": 3.549746308748398, "grad_norm": 0.045833859169462775, "learning_rate": 4.1800882675764124e-05, "loss": 0.8401, "step": 202190 }, { "epoch": 3.5499218736283993, "grad_norm": 0.0596688177222984, "learning_rate": 4.1791951230613476e-05, "loss": 0.8314, "step": 202200 }, { "epoch": 3.5500974385084008, "grad_norm": 0.05856868314200318, "learning_rate": 4.178302053564021e-05, "loss": 0.8424, "step": 202210 }, { "epoch": 3.5502730033884022, "grad_norm": 0.07062133216357847, "learning_rate": 4.177409059095743e-05, "loss": 0.8365, "step": 202220 }, { "epoch": 3.5504485682684037, "grad_norm": 0.04275938854001201, "learning_rate": 4.17651613966782e-05, "loss": 0.8383, "step": 202230 }, { "epoch": 3.550624133148405, "grad_norm": 0.05672602918209526, "learning_rate": 4.1756232952915785e-05, "loss": 0.838, "step": 202240 }, { "epoch": 3.5507996980284062, "grad_norm": 0.045234960715627624, "learning_rate": 4.1747305259783236e-05, "loss": 0.8382, "step": 202250 }, { "epoch": 3.5509752629084077, "grad_norm": 0.04782335218777148, "learning_rate": 4.173837831739368e-05, "loss": 0.8385, "step": 202260 }, { "epoch": 3.551150827788409, "grad_norm": 0.05253011824523701, "learning_rate": 4.172945212586026e-05, "loss": 0.8469, "step": 202270 }, { "epoch": 3.5513263926684107, "grad_norm": 0.05578869191666809, "learning_rate": 4.172052668529601e-05, "loss": 0.8433, "step": 202280 }, { "epoch": 3.551501957548412, "grad_norm": 0.049992607179486725, "learning_rate": 4.171160199581408e-05, "loss": 0.8398, "step": 202290 }, { "epoch": 3.551677522428413, "grad_norm": 0.06656533147827975, "learning_rate": 4.1702678057527445e-05, "loss": 0.8443, "step": 202300 }, { "epoch": 3.551853087308415, "grad_norm": 0.045782272780197676, "learning_rate": 4.169375487054932e-05, "loss": 0.8432, "step": 202310 }, { "epoch": 3.552028652188416, "grad_norm": 0.05271271831102063, "learning_rate": 4.168483243499265e-05, "loss": 0.8405, "step": 202320 }, { "epoch": 3.5522042170684176, "grad_norm": 0.06386151959883696, "learning_rate": 4.1675910750970506e-05, "loss": 0.8472, "step": 202330 }, { "epoch": 3.552379781948419, "grad_norm": 0.05753717583141441, "learning_rate": 4.166698981859599e-05, "loss": 0.8428, "step": 202340 }, { "epoch": 3.5525553468284206, "grad_norm": 0.05829183750975896, "learning_rate": 4.1658069637982053e-05, "loss": 0.8354, "step": 202350 }, { "epoch": 3.552730911708422, "grad_norm": 0.04525614193209865, "learning_rate": 4.164915020924177e-05, "loss": 0.8356, "step": 202360 }, { "epoch": 3.552906476588423, "grad_norm": 0.0554474696914655, "learning_rate": 4.164023153248807e-05, "loss": 0.8418, "step": 202370 }, { "epoch": 3.5530820414684245, "grad_norm": 0.056123977958867304, "learning_rate": 4.163131360783407e-05, "loss": 0.8387, "step": 202380 }, { "epoch": 3.553257606348426, "grad_norm": 0.04682982716980217, "learning_rate": 4.162239643539272e-05, "loss": 0.8428, "step": 202390 }, { "epoch": 3.5534331712284275, "grad_norm": 0.056302710663250925, "learning_rate": 4.16134800152769e-05, "loss": 0.8443, "step": 202400 }, { "epoch": 3.553608736108429, "grad_norm": 0.05353204510118847, "learning_rate": 4.160456434759975e-05, "loss": 0.8396, "step": 202410 }, { "epoch": 3.55378430098843, "grad_norm": 0.043131618332352836, "learning_rate": 4.159564943247412e-05, "loss": 0.839, "step": 202420 }, { "epoch": 3.553959865868432, "grad_norm": 0.05099243352552083, "learning_rate": 4.1586735270012984e-05, "loss": 0.8477, "step": 202430 }, { "epoch": 3.554135430748433, "grad_norm": 0.05756520207757817, "learning_rate": 4.157782186032934e-05, "loss": 0.8395, "step": 202440 }, { "epoch": 3.5543109956284344, "grad_norm": 0.04849570403860387, "learning_rate": 4.1568909203536046e-05, "loss": 0.8358, "step": 202450 }, { "epoch": 3.554486560508436, "grad_norm": 0.07681580272349718, "learning_rate": 4.155999729974611e-05, "loss": 0.8446, "step": 202460 }, { "epoch": 3.5546621253884374, "grad_norm": 0.05484062510705913, "learning_rate": 4.155108614907231e-05, "loss": 0.8366, "step": 202470 }, { "epoch": 3.554837690268439, "grad_norm": 0.05692925950926333, "learning_rate": 4.154217575162772e-05, "loss": 0.8452, "step": 202480 }, { "epoch": 3.55501325514844, "grad_norm": 0.06384814514583219, "learning_rate": 4.153326610752513e-05, "loss": 0.8385, "step": 202490 }, { "epoch": 3.5551888200284414, "grad_norm": 0.059933017677313835, "learning_rate": 4.1524357216877454e-05, "loss": 0.8377, "step": 202500 }, { "epoch": 3.555364384908443, "grad_norm": 0.05190822824900191, "learning_rate": 4.151544907979759e-05, "loss": 0.8399, "step": 202510 }, { "epoch": 3.5555399497884443, "grad_norm": 0.05897556912746814, "learning_rate": 4.150654169639836e-05, "loss": 0.8371, "step": 202520 }, { "epoch": 3.555715514668446, "grad_norm": 0.055389475768358225, "learning_rate": 4.149763506679269e-05, "loss": 0.8414, "step": 202530 }, { "epoch": 3.555891079548447, "grad_norm": 0.06049300130611548, "learning_rate": 4.148872919109332e-05, "loss": 0.8409, "step": 202540 }, { "epoch": 3.5560666444284488, "grad_norm": 0.06258305308499724, "learning_rate": 4.1479824069413206e-05, "loss": 0.8342, "step": 202550 }, { "epoch": 3.55624220930845, "grad_norm": 0.04925570555700529, "learning_rate": 4.147091970186515e-05, "loss": 0.8469, "step": 202560 }, { "epoch": 3.5564177741884513, "grad_norm": 0.05494974008369435, "learning_rate": 4.146201608856187e-05, "loss": 0.8426, "step": 202570 }, { "epoch": 3.5565933390684528, "grad_norm": 0.04577489898155138, "learning_rate": 4.145311322961633e-05, "loss": 0.844, "step": 202580 }, { "epoch": 3.5567689039484542, "grad_norm": 0.055549322653693736, "learning_rate": 4.144421112514122e-05, "loss": 0.8392, "step": 202590 }, { "epoch": 3.5569444688284557, "grad_norm": 0.05304639264826229, "learning_rate": 4.143530977524936e-05, "loss": 0.8421, "step": 202600 }, { "epoch": 3.5571200337084568, "grad_norm": 0.062293863860261825, "learning_rate": 4.14264091800536e-05, "loss": 0.8407, "step": 202610 }, { "epoch": 3.5572955985884582, "grad_norm": 0.05422314390967318, "learning_rate": 4.14175093396666e-05, "loss": 0.8388, "step": 202620 }, { "epoch": 3.5574711634684597, "grad_norm": 0.054306372736674574, "learning_rate": 4.140861025420121e-05, "loss": 0.8431, "step": 202630 }, { "epoch": 3.557646728348461, "grad_norm": 0.06211548648470621, "learning_rate": 4.139971192377009e-05, "loss": 0.8412, "step": 202640 }, { "epoch": 3.5578222932284627, "grad_norm": 0.04950475590343682, "learning_rate": 4.13908143484861e-05, "loss": 0.8292, "step": 202650 }, { "epoch": 3.557997858108464, "grad_norm": 0.058567028664314374, "learning_rate": 4.138191752846188e-05, "loss": 0.8333, "step": 202660 }, { "epoch": 3.5581734229884656, "grad_norm": 0.07073776040709467, "learning_rate": 4.1373021463810206e-05, "loss": 0.8404, "step": 202670 }, { "epoch": 3.5583489878684667, "grad_norm": 0.04699363637424547, "learning_rate": 4.13641261546438e-05, "loss": 0.8385, "step": 202680 }, { "epoch": 3.558524552748468, "grad_norm": 0.04920509816073665, "learning_rate": 4.135523160107531e-05, "loss": 0.8406, "step": 202690 }, { "epoch": 3.5587001176284696, "grad_norm": 0.054469556173625554, "learning_rate": 4.1346337803217514e-05, "loss": 0.8457, "step": 202700 }, { "epoch": 3.558875682508471, "grad_norm": 0.047264725350148286, "learning_rate": 4.133744476118296e-05, "loss": 0.8453, "step": 202710 }, { "epoch": 3.5590512473884726, "grad_norm": 0.05935997797528456, "learning_rate": 4.1328552475084495e-05, "loss": 0.8525, "step": 202720 }, { "epoch": 3.5592268122684736, "grad_norm": 0.04822267649608381, "learning_rate": 4.131966094503471e-05, "loss": 0.834, "step": 202730 }, { "epoch": 3.559402377148475, "grad_norm": 0.05441704519615671, "learning_rate": 4.131077017114619e-05, "loss": 0.8444, "step": 202740 }, { "epoch": 3.5595779420284765, "grad_norm": 0.05563811759070384, "learning_rate": 4.1301880153531705e-05, "loss": 0.8441, "step": 202750 }, { "epoch": 3.559753506908478, "grad_norm": 0.09059775388365314, "learning_rate": 4.129299089230382e-05, "loss": 0.8431, "step": 202760 }, { "epoch": 3.5599290717884795, "grad_norm": 0.04860350001193106, "learning_rate": 4.128410238757518e-05, "loss": 0.8432, "step": 202770 }, { "epoch": 3.560104636668481, "grad_norm": 0.05666766351577844, "learning_rate": 4.127521463945844e-05, "loss": 0.8343, "step": 202780 }, { "epoch": 3.5602802015484825, "grad_norm": 0.04917878503774935, "learning_rate": 4.126632764806615e-05, "loss": 0.8378, "step": 202790 }, { "epoch": 3.5604557664284835, "grad_norm": 0.049211717103859555, "learning_rate": 4.125744141351097e-05, "loss": 0.8386, "step": 202800 }, { "epoch": 3.560631331308485, "grad_norm": 0.06303250754395291, "learning_rate": 4.124855593590537e-05, "loss": 0.839, "step": 202810 }, { "epoch": 3.5608068961884864, "grad_norm": 0.07174008101092028, "learning_rate": 4.123967121536211e-05, "loss": 0.8424, "step": 202820 }, { "epoch": 3.560982461068488, "grad_norm": 0.054384886721889916, "learning_rate": 4.1230787251993626e-05, "loss": 0.8364, "step": 202830 }, { "epoch": 3.5611580259484894, "grad_norm": 0.05212891562461495, "learning_rate": 4.1221904045912516e-05, "loss": 0.8456, "step": 202840 }, { "epoch": 3.5613335908284904, "grad_norm": 0.04443412864739048, "learning_rate": 4.1213021597231375e-05, "loss": 0.8458, "step": 202850 }, { "epoch": 3.561509155708492, "grad_norm": 0.05714945114234988, "learning_rate": 4.1204139906062674e-05, "loss": 0.8338, "step": 202860 }, { "epoch": 3.5616847205884934, "grad_norm": 0.049674807923311255, "learning_rate": 4.119525897251902e-05, "loss": 0.843, "step": 202870 }, { "epoch": 3.561860285468495, "grad_norm": 0.05106627280403374, "learning_rate": 4.118637879671283e-05, "loss": 0.8439, "step": 202880 }, { "epoch": 3.5620358503484963, "grad_norm": 0.07058066402984899, "learning_rate": 4.117749937875674e-05, "loss": 0.8432, "step": 202890 }, { "epoch": 3.562211415228498, "grad_norm": 0.04403428552019013, "learning_rate": 4.1168620718763196e-05, "loss": 0.8365, "step": 202900 }, { "epoch": 3.5623869801084993, "grad_norm": 0.08499784350790578, "learning_rate": 4.115974281684464e-05, "loss": 0.8413, "step": 202910 }, { "epoch": 3.5625625449885003, "grad_norm": 0.05276800048834633, "learning_rate": 4.1150865673113675e-05, "loss": 0.8335, "step": 202920 }, { "epoch": 3.562738109868502, "grad_norm": 0.06022356543608555, "learning_rate": 4.1141989287682675e-05, "loss": 0.8345, "step": 202930 }, { "epoch": 3.5629136747485033, "grad_norm": 0.043723620453777624, "learning_rate": 4.113311366066414e-05, "loss": 0.8446, "step": 202940 }, { "epoch": 3.5630892396285048, "grad_norm": 0.05023270590943099, "learning_rate": 4.1124238792170575e-05, "loss": 0.8377, "step": 202950 }, { "epoch": 3.5632648045085062, "grad_norm": 0.06960507605648897, "learning_rate": 4.111536468231433e-05, "loss": 0.8437, "step": 202960 }, { "epoch": 3.5634403693885073, "grad_norm": 0.05835610732478577, "learning_rate": 4.110649133120793e-05, "loss": 0.8376, "step": 202970 }, { "epoch": 3.563615934268509, "grad_norm": 0.0562142610590623, "learning_rate": 4.109761873896369e-05, "loss": 0.8425, "step": 202980 }, { "epoch": 3.5637914991485102, "grad_norm": 0.058810379577349386, "learning_rate": 4.1088746905694185e-05, "loss": 0.8454, "step": 202990 }, { "epoch": 3.5639670640285117, "grad_norm": 0.0552412548479127, "learning_rate": 4.1079875831511696e-05, "loss": 0.8401, "step": 203000 }, { "epoch": 3.564142628908513, "grad_norm": 0.05018610659422982, "learning_rate": 4.1071005516528654e-05, "loss": 0.8392, "step": 203010 }, { "epoch": 3.5643181937885147, "grad_norm": 0.048676245628602635, "learning_rate": 4.106213596085751e-05, "loss": 0.8408, "step": 203020 }, { "epoch": 3.564493758668516, "grad_norm": 0.043905972137325756, "learning_rate": 4.1053267164610545e-05, "loss": 0.8393, "step": 203030 }, { "epoch": 3.564669323548517, "grad_norm": 0.04926263508264267, "learning_rate": 4.104439912790021e-05, "loss": 0.8419, "step": 203040 }, { "epoch": 3.5648448884285187, "grad_norm": 0.0647272489065477, "learning_rate": 4.1035531850838756e-05, "loss": 0.844, "step": 203050 }, { "epoch": 3.56502045330852, "grad_norm": 0.052857568552382245, "learning_rate": 4.102666533353869e-05, "loss": 0.8431, "step": 203060 }, { "epoch": 3.5651960181885216, "grad_norm": 0.05514856923540912, "learning_rate": 4.101779957611225e-05, "loss": 0.8379, "step": 203070 }, { "epoch": 3.565371583068523, "grad_norm": 0.04719526684672021, "learning_rate": 4.100893457867172e-05, "loss": 0.8413, "step": 203080 }, { "epoch": 3.565547147948524, "grad_norm": 0.060341597954230004, "learning_rate": 4.100007034132956e-05, "loss": 0.8388, "step": 203090 }, { "epoch": 3.565722712828526, "grad_norm": 0.062367611970927446, "learning_rate": 4.0991206864197966e-05, "loss": 0.848, "step": 203100 }, { "epoch": 3.565898277708527, "grad_norm": 0.056058023677040374, "learning_rate": 4.098234414738929e-05, "loss": 0.8359, "step": 203110 }, { "epoch": 3.5660738425885286, "grad_norm": 0.04424588217058093, "learning_rate": 4.0973482191015844e-05, "loss": 0.8325, "step": 203120 }, { "epoch": 3.56624940746853, "grad_norm": 0.053134280099364946, "learning_rate": 4.096462099518986e-05, "loss": 0.8352, "step": 203130 }, { "epoch": 3.5664249723485315, "grad_norm": 0.05051734508317647, "learning_rate": 4.0955760560023655e-05, "loss": 0.8366, "step": 203140 }, { "epoch": 3.566600537228533, "grad_norm": 0.0657756387204699, "learning_rate": 4.0946900885629405e-05, "loss": 0.8392, "step": 203150 }, { "epoch": 3.566776102108534, "grad_norm": 0.06267971219693, "learning_rate": 4.09380419721195e-05, "loss": 0.8425, "step": 203160 }, { "epoch": 3.5669516669885355, "grad_norm": 0.05531373696262727, "learning_rate": 4.0929183819606076e-05, "loss": 0.8352, "step": 203170 }, { "epoch": 3.567127231868537, "grad_norm": 0.059137362197684694, "learning_rate": 4.092032642820141e-05, "loss": 0.8329, "step": 203180 }, { "epoch": 3.5673027967485385, "grad_norm": 0.04997041305769301, "learning_rate": 4.091146979801776e-05, "loss": 0.8373, "step": 203190 }, { "epoch": 3.56747836162854, "grad_norm": 0.05211164184956024, "learning_rate": 4.0902613929167265e-05, "loss": 0.844, "step": 203200 }, { "epoch": 3.567653926508541, "grad_norm": 0.05215929272867722, "learning_rate": 4.0893758821762204e-05, "loss": 0.839, "step": 203210 }, { "epoch": 3.567829491388543, "grad_norm": 0.05592047709582128, "learning_rate": 4.088490447591467e-05, "loss": 0.8407, "step": 203220 }, { "epoch": 3.568005056268544, "grad_norm": 0.04441345397438826, "learning_rate": 4.0876050891736984e-05, "loss": 0.8412, "step": 203230 }, { "epoch": 3.5681806211485454, "grad_norm": 0.0628030923881106, "learning_rate": 4.0867198069341256e-05, "loss": 0.8393, "step": 203240 }, { "epoch": 3.568356186028547, "grad_norm": 0.061767690831085534, "learning_rate": 4.085834600883959e-05, "loss": 0.8412, "step": 203250 }, { "epoch": 3.5685317509085484, "grad_norm": 0.05543397171612809, "learning_rate": 4.0849494710344265e-05, "loss": 0.8417, "step": 203260 }, { "epoch": 3.56870731578855, "grad_norm": 0.0473728266300272, "learning_rate": 4.0840644173967344e-05, "loss": 0.8415, "step": 203270 }, { "epoch": 3.568882880668551, "grad_norm": 0.05212631742208889, "learning_rate": 4.0831794399820986e-05, "loss": 0.8441, "step": 203280 }, { "epoch": 3.5690584455485523, "grad_norm": 0.05894944561356138, "learning_rate": 4.0822945388017365e-05, "loss": 0.8397, "step": 203290 }, { "epoch": 3.569234010428554, "grad_norm": 0.05168372537498453, "learning_rate": 4.0814097138668514e-05, "loss": 0.8453, "step": 203300 }, { "epoch": 3.5694095753085553, "grad_norm": 0.06392651958885176, "learning_rate": 4.080524965188663e-05, "loss": 0.8395, "step": 203310 }, { "epoch": 3.5695851401885568, "grad_norm": 0.05519640541424056, "learning_rate": 4.079640292778369e-05, "loss": 0.8366, "step": 203320 }, { "epoch": 3.5697607050685582, "grad_norm": 0.06269251959870974, "learning_rate": 4.0787556966471933e-05, "loss": 0.8494, "step": 203330 }, { "epoch": 3.5699362699485597, "grad_norm": 0.04676732525232947, "learning_rate": 4.077871176806335e-05, "loss": 0.8453, "step": 203340 }, { "epoch": 3.5701118348285608, "grad_norm": 0.05692040134585112, "learning_rate": 4.0769867332670015e-05, "loss": 0.8368, "step": 203350 }, { "epoch": 3.5702873997085622, "grad_norm": 0.05874838279287591, "learning_rate": 4.0761023660404045e-05, "loss": 0.848, "step": 203360 }, { "epoch": 3.5704629645885637, "grad_norm": 0.05318390010441011, "learning_rate": 4.075218075137742e-05, "loss": 0.8397, "step": 203370 }, { "epoch": 3.570638529468565, "grad_norm": 0.062118498454934126, "learning_rate": 4.074333860570223e-05, "loss": 0.841, "step": 203380 }, { "epoch": 3.5708140943485667, "grad_norm": 0.06786988413455702, "learning_rate": 4.0734497223490435e-05, "loss": 0.8432, "step": 203390 }, { "epoch": 3.5709896592285677, "grad_norm": 0.0472913827684817, "learning_rate": 4.072565660485418e-05, "loss": 0.8451, "step": 203400 }, { "epoch": 3.571165224108569, "grad_norm": 0.051010779853757245, "learning_rate": 4.071681674990539e-05, "loss": 0.8442, "step": 203410 }, { "epoch": 3.5713407889885707, "grad_norm": 0.05559682996026318, "learning_rate": 4.070797765875603e-05, "loss": 0.8442, "step": 203420 }, { "epoch": 3.571516353868572, "grad_norm": 0.05451190723504474, "learning_rate": 4.069913933151821e-05, "loss": 0.8464, "step": 203430 }, { "epoch": 3.5716919187485736, "grad_norm": 0.05223389657143302, "learning_rate": 4.069030176830382e-05, "loss": 0.8475, "step": 203440 }, { "epoch": 3.571867483628575, "grad_norm": 0.06565356224027362, "learning_rate": 4.068146496922487e-05, "loss": 0.8389, "step": 203450 }, { "epoch": 3.5720430485085766, "grad_norm": 0.056951755640559855, "learning_rate": 4.0672628934393355e-05, "loss": 0.8267, "step": 203460 }, { "epoch": 3.5722186133885776, "grad_norm": 0.042842283214547396, "learning_rate": 4.066379366392114e-05, "loss": 0.834, "step": 203470 }, { "epoch": 3.572394178268579, "grad_norm": 0.06392268482816076, "learning_rate": 4.065495915792028e-05, "loss": 0.8439, "step": 203480 }, { "epoch": 3.5725697431485806, "grad_norm": 0.054664258166242305, "learning_rate": 4.0646125416502565e-05, "loss": 0.8406, "step": 203490 }, { "epoch": 3.572745308028582, "grad_norm": 0.06577153496965708, "learning_rate": 4.063729243978008e-05, "loss": 0.8441, "step": 203500 }, { "epoch": 3.5729208729085835, "grad_norm": 0.048750923422640764, "learning_rate": 4.062846022786464e-05, "loss": 0.8404, "step": 203510 }, { "epoch": 3.5730964377885845, "grad_norm": 0.06080642207217739, "learning_rate": 4.0619628780868185e-05, "loss": 0.845, "step": 203520 }, { "epoch": 3.573272002668586, "grad_norm": 0.055595359858964584, "learning_rate": 4.061079809890262e-05, "loss": 0.8386, "step": 203530 }, { "epoch": 3.5734475675485875, "grad_norm": 0.05639471008767674, "learning_rate": 4.06019681820798e-05, "loss": 0.8422, "step": 203540 }, { "epoch": 3.573623132428589, "grad_norm": 0.047287642238419955, "learning_rate": 4.059313903051165e-05, "loss": 0.8388, "step": 203550 }, { "epoch": 3.5737986973085905, "grad_norm": 0.07907564200901977, "learning_rate": 4.0584310644309934e-05, "loss": 0.8353, "step": 203560 }, { "epoch": 3.573974262188592, "grad_norm": 0.06610837939658015, "learning_rate": 4.057548302358665e-05, "loss": 0.8347, "step": 203570 }, { "epoch": 3.5741498270685934, "grad_norm": 0.053973978678530585, "learning_rate": 4.0566656168453574e-05, "loss": 0.847, "step": 203580 }, { "epoch": 3.5743253919485944, "grad_norm": 0.051973333485142006, "learning_rate": 4.055783007902248e-05, "loss": 0.837, "step": 203590 }, { "epoch": 3.574500956828596, "grad_norm": 0.0702230287905264, "learning_rate": 4.0549004755405326e-05, "loss": 0.8336, "step": 203600 }, { "epoch": 3.5746765217085974, "grad_norm": 0.044784239291725994, "learning_rate": 4.054018019771384e-05, "loss": 0.8375, "step": 203610 }, { "epoch": 3.574852086588599, "grad_norm": 0.05503499102236917, "learning_rate": 4.053135640605985e-05, "loss": 0.8394, "step": 203620 }, { "epoch": 3.5750276514686004, "grad_norm": 0.055131560551788736, "learning_rate": 4.052253338055521e-05, "loss": 0.8363, "step": 203630 }, { "epoch": 3.5752032163486014, "grad_norm": 0.05005782045798044, "learning_rate": 4.051371112131161e-05, "loss": 0.8473, "step": 203640 }, { "epoch": 3.575378781228603, "grad_norm": 0.05750058981820543, "learning_rate": 4.0504889628440926e-05, "loss": 0.8405, "step": 203650 }, { "epoch": 3.5755543461086043, "grad_norm": 0.0719880241557586, "learning_rate": 4.0496068902054824e-05, "loss": 0.8374, "step": 203660 }, { "epoch": 3.575729910988606, "grad_norm": 0.059919521174012134, "learning_rate": 4.048724894226518e-05, "loss": 0.8389, "step": 203670 }, { "epoch": 3.5759054758686073, "grad_norm": 0.08135570151373045, "learning_rate": 4.047842974918366e-05, "loss": 0.8403, "step": 203680 }, { "epoch": 3.5760810407486088, "grad_norm": 0.05609337692813264, "learning_rate": 4.046961132292204e-05, "loss": 0.8454, "step": 203690 }, { "epoch": 3.5762566056286103, "grad_norm": 0.04714103623834301, "learning_rate": 4.046079366359207e-05, "loss": 0.841, "step": 203700 }, { "epoch": 3.5764321705086113, "grad_norm": 0.04653102130308885, "learning_rate": 4.0451976771305416e-05, "loss": 0.8438, "step": 203710 }, { "epoch": 3.5766077353886128, "grad_norm": 0.056402895055871656, "learning_rate": 4.044316064617385e-05, "loss": 0.8434, "step": 203720 }, { "epoch": 3.5767833002686142, "grad_norm": 0.06184846607343485, "learning_rate": 4.043434528830899e-05, "loss": 0.8458, "step": 203730 }, { "epoch": 3.5769588651486157, "grad_norm": 0.056935364931794324, "learning_rate": 4.042553069782264e-05, "loss": 0.8356, "step": 203740 }, { "epoch": 3.577134430028617, "grad_norm": 0.06603122215129295, "learning_rate": 4.041671687482643e-05, "loss": 0.8448, "step": 203750 }, { "epoch": 3.5773099949086182, "grad_norm": 0.055494467210946447, "learning_rate": 4.040790381943195e-05, "loss": 0.8318, "step": 203760 }, { "epoch": 3.57748555978862, "grad_norm": 0.04986798167969904, "learning_rate": 4.0399091531751014e-05, "loss": 0.8438, "step": 203770 }, { "epoch": 3.577661124668621, "grad_norm": 0.06696173068089792, "learning_rate": 4.039028001189517e-05, "loss": 0.8423, "step": 203780 }, { "epoch": 3.5778366895486227, "grad_norm": 0.05688598677536515, "learning_rate": 4.03814692599761e-05, "loss": 0.8451, "step": 203790 }, { "epoch": 3.578012254428624, "grad_norm": 0.04818910663596762, "learning_rate": 4.0372659276105455e-05, "loss": 0.8429, "step": 203800 }, { "epoch": 3.5781878193086256, "grad_norm": 0.044793666118685765, "learning_rate": 4.036385006039481e-05, "loss": 0.8395, "step": 203810 }, { "epoch": 3.578363384188627, "grad_norm": 0.06292232257896128, "learning_rate": 4.0355041612955834e-05, "loss": 0.8428, "step": 203820 }, { "epoch": 3.578538949068628, "grad_norm": 0.06377873030263963, "learning_rate": 4.034623393390004e-05, "loss": 0.8427, "step": 203830 }, { "epoch": 3.5787145139486296, "grad_norm": 0.05041851045295028, "learning_rate": 4.033742702333915e-05, "loss": 0.8331, "step": 203840 }, { "epoch": 3.578890078828631, "grad_norm": 0.050458019741886115, "learning_rate": 4.032862088138465e-05, "loss": 0.8444, "step": 203850 }, { "epoch": 3.5790656437086326, "grad_norm": 0.05887360930801073, "learning_rate": 4.0319815508148164e-05, "loss": 0.8371, "step": 203860 }, { "epoch": 3.579241208588634, "grad_norm": 0.050189886463340355, "learning_rate": 4.031101090374126e-05, "loss": 0.833, "step": 203870 }, { "epoch": 3.579416773468635, "grad_norm": 0.0525093874906584, "learning_rate": 4.0302207068275464e-05, "loss": 0.8388, "step": 203880 }, { "epoch": 3.579592338348637, "grad_norm": 0.05798603792512317, "learning_rate": 4.029340400186236e-05, "loss": 0.8412, "step": 203890 }, { "epoch": 3.579767903228638, "grad_norm": 0.06048998231733206, "learning_rate": 4.0284601704613403e-05, "loss": 0.8389, "step": 203900 }, { "epoch": 3.5799434681086395, "grad_norm": 0.06215780172567295, "learning_rate": 4.0275800176640235e-05, "loss": 0.8358, "step": 203910 }, { "epoch": 3.580119032988641, "grad_norm": 0.06516999417498093, "learning_rate": 4.026699941805432e-05, "loss": 0.8448, "step": 203920 }, { "epoch": 3.5802945978686425, "grad_norm": 0.04535283128208282, "learning_rate": 4.0258199428967094e-05, "loss": 0.845, "step": 203930 }, { "epoch": 3.580470162748644, "grad_norm": 0.04622808292374348, "learning_rate": 4.024940020949019e-05, "loss": 0.8433, "step": 203940 }, { "epoch": 3.580645727628645, "grad_norm": 0.04712096460423334, "learning_rate": 4.0240601759735e-05, "loss": 0.8521, "step": 203950 }, { "epoch": 3.5808212925086464, "grad_norm": 0.043223905466618695, "learning_rate": 4.023180407981302e-05, "loss": 0.8384, "step": 203960 }, { "epoch": 3.580996857388648, "grad_norm": 0.050162278922400286, "learning_rate": 4.022300716983576e-05, "loss": 0.8356, "step": 203970 }, { "epoch": 3.5811724222686494, "grad_norm": 0.058472085314676944, "learning_rate": 4.021421102991463e-05, "loss": 0.8412, "step": 203980 }, { "epoch": 3.581347987148651, "grad_norm": 0.053769121019810025, "learning_rate": 4.0205415660161115e-05, "loss": 0.8451, "step": 203990 }, { "epoch": 3.581523552028652, "grad_norm": 0.043511421998256056, "learning_rate": 4.0196621060686575e-05, "loss": 0.8448, "step": 204000 }, { "epoch": 3.581699116908654, "grad_norm": 0.05447542808550285, "learning_rate": 4.018782723160256e-05, "loss": 0.8409, "step": 204010 }, { "epoch": 3.581874681788655, "grad_norm": 0.0475065970265568, "learning_rate": 4.01790341730204e-05, "loss": 0.8373, "step": 204020 }, { "epoch": 3.5820502466686563, "grad_norm": 0.06647503068357817, "learning_rate": 4.017024188505154e-05, "loss": 0.8356, "step": 204030 }, { "epoch": 3.582225811548658, "grad_norm": 0.06291575588216783, "learning_rate": 4.016145036780739e-05, "loss": 0.8366, "step": 204040 }, { "epoch": 3.5824013764286593, "grad_norm": 0.05716841535008424, "learning_rate": 4.0152659621399295e-05, "loss": 0.8436, "step": 204050 }, { "epoch": 3.582576941308661, "grad_norm": 0.06972153520253196, "learning_rate": 4.014386964593871e-05, "loss": 0.8428, "step": 204060 }, { "epoch": 3.582752506188662, "grad_norm": 0.04489099078782428, "learning_rate": 4.013508044153688e-05, "loss": 0.8367, "step": 204070 }, { "epoch": 3.5829280710686633, "grad_norm": 0.0455980628800626, "learning_rate": 4.012629200830532e-05, "loss": 0.8515, "step": 204080 }, { "epoch": 3.5831036359486648, "grad_norm": 0.05969502702524924, "learning_rate": 4.0117504346355304e-05, "loss": 0.8417, "step": 204090 }, { "epoch": 3.5832792008286662, "grad_norm": 0.04379808265284023, "learning_rate": 4.01087174557981e-05, "loss": 0.8415, "step": 204100 }, { "epoch": 3.5834547657086677, "grad_norm": 0.056597865405464055, "learning_rate": 4.0099931336745196e-05, "loss": 0.8434, "step": 204110 }, { "epoch": 3.583630330588669, "grad_norm": 0.06699591871944781, "learning_rate": 4.0091145989307794e-05, "loss": 0.8335, "step": 204120 }, { "epoch": 3.5838058954686707, "grad_norm": 0.04555225835889251, "learning_rate": 4.0082361413597244e-05, "loss": 0.8371, "step": 204130 }, { "epoch": 3.5839814603486717, "grad_norm": 0.060080464240844025, "learning_rate": 4.0073577609724896e-05, "loss": 0.8377, "step": 204140 }, { "epoch": 3.584157025228673, "grad_norm": 0.04812101164596821, "learning_rate": 4.006479457780195e-05, "loss": 0.8457, "step": 204150 }, { "epoch": 3.5843325901086747, "grad_norm": 0.0489332553459495, "learning_rate": 4.005601231793977e-05, "loss": 0.8361, "step": 204160 }, { "epoch": 3.584508154988676, "grad_norm": 0.04835316200155569, "learning_rate": 4.004723083024952e-05, "loss": 0.8318, "step": 204170 }, { "epoch": 3.5846837198686776, "grad_norm": 0.16322349974429587, "learning_rate": 4.003845011484262e-05, "loss": 0.8477, "step": 204180 }, { "epoch": 3.5848592847486787, "grad_norm": 0.045349477365258024, "learning_rate": 4.002967017183019e-05, "loss": 0.85, "step": 204190 }, { "epoch": 3.58503484962868, "grad_norm": 0.051186666214025685, "learning_rate": 4.0020891001323544e-05, "loss": 0.842, "step": 204200 }, { "epoch": 3.5852104145086816, "grad_norm": 0.03750551568835322, "learning_rate": 4.001211260343391e-05, "loss": 0.8466, "step": 204210 }, { "epoch": 3.585385979388683, "grad_norm": 0.04556284580972863, "learning_rate": 4.0003334978272466e-05, "loss": 0.8399, "step": 204220 }, { "epoch": 3.5855615442686846, "grad_norm": 0.046865219660420246, "learning_rate": 3.9994558125950496e-05, "loss": 0.8429, "step": 204230 }, { "epoch": 3.585737109148686, "grad_norm": 0.04456512067272074, "learning_rate": 3.99857820465791e-05, "loss": 0.8413, "step": 204240 }, { "epoch": 3.5859126740286875, "grad_norm": 0.04926872852726842, "learning_rate": 3.997700674026961e-05, "loss": 0.8298, "step": 204250 }, { "epoch": 3.5860882389086886, "grad_norm": 0.0496480466769681, "learning_rate": 3.9968232207133134e-05, "loss": 0.8397, "step": 204260 }, { "epoch": 3.58626380378869, "grad_norm": 0.04821883830851315, "learning_rate": 3.995945844728078e-05, "loss": 0.8435, "step": 204270 }, { "epoch": 3.5864393686686915, "grad_norm": 0.05491757557960276, "learning_rate": 3.995068546082386e-05, "loss": 0.8407, "step": 204280 }, { "epoch": 3.586614933548693, "grad_norm": 0.05089817973005301, "learning_rate": 3.994191324787342e-05, "loss": 0.8334, "step": 204290 }, { "epoch": 3.5867904984286945, "grad_norm": 0.06961722959406363, "learning_rate": 3.9933141808540644e-05, "loss": 0.8442, "step": 204300 }, { "epoch": 3.5869660633086955, "grad_norm": 0.04321426585939357, "learning_rate": 3.9924371142936695e-05, "loss": 0.8417, "step": 204310 }, { "epoch": 3.587141628188697, "grad_norm": 0.0636543276861387, "learning_rate": 3.9915601251172635e-05, "loss": 0.8428, "step": 204320 }, { "epoch": 3.5873171930686985, "grad_norm": 0.05056435988226329, "learning_rate": 3.990683213335965e-05, "loss": 0.837, "step": 204330 }, { "epoch": 3.5874927579487, "grad_norm": 0.04109556600883279, "learning_rate": 3.9898063789608744e-05, "loss": 0.8433, "step": 204340 }, { "epoch": 3.5876683228287014, "grad_norm": 0.06119306234595807, "learning_rate": 3.988929622003115e-05, "loss": 0.8424, "step": 204350 }, { "epoch": 3.587843887708703, "grad_norm": 0.054764875019544634, "learning_rate": 3.988052942473784e-05, "loss": 0.846, "step": 204360 }, { "epoch": 3.5880194525887044, "grad_norm": 0.04655053785345462, "learning_rate": 3.987176340383994e-05, "loss": 0.8346, "step": 204370 }, { "epoch": 3.5881950174687054, "grad_norm": 0.055528761084133384, "learning_rate": 3.986299815744855e-05, "loss": 0.8422, "step": 204380 }, { "epoch": 3.588370582348707, "grad_norm": 0.046749292441046586, "learning_rate": 3.985423368567466e-05, "loss": 0.8396, "step": 204390 }, { "epoch": 3.5885461472287083, "grad_norm": 0.045128756945547965, "learning_rate": 3.984546998862937e-05, "loss": 0.8388, "step": 204400 }, { "epoch": 3.58872171210871, "grad_norm": 0.053612154672146285, "learning_rate": 3.983670706642364e-05, "loss": 0.843, "step": 204410 }, { "epoch": 3.5888972769887113, "grad_norm": 0.053299931509505016, "learning_rate": 3.982794491916861e-05, "loss": 0.8372, "step": 204420 }, { "epoch": 3.5890728418687123, "grad_norm": 0.04520492809169053, "learning_rate": 3.981918354697525e-05, "loss": 0.8427, "step": 204430 }, { "epoch": 3.5892484067487143, "grad_norm": 0.054514115105250824, "learning_rate": 3.9810422949954485e-05, "loss": 0.8357, "step": 204440 }, { "epoch": 3.5894239716287153, "grad_norm": 0.04930783690307168, "learning_rate": 3.9801663128217465e-05, "loss": 0.8398, "step": 204450 }, { "epoch": 3.5895995365087168, "grad_norm": 0.055539474788534327, "learning_rate": 3.979290408187505e-05, "loss": 0.8384, "step": 204460 }, { "epoch": 3.5897751013887182, "grad_norm": 0.06281783055519903, "learning_rate": 3.978414581103829e-05, "loss": 0.837, "step": 204470 }, { "epoch": 3.5899506662687197, "grad_norm": 0.06506606588206723, "learning_rate": 3.9775388315818165e-05, "loss": 0.8386, "step": 204480 }, { "epoch": 3.590126231148721, "grad_norm": 0.05005927545891067, "learning_rate": 3.976663159632556e-05, "loss": 0.8373, "step": 204490 }, { "epoch": 3.5903017960287222, "grad_norm": 0.05557592715492525, "learning_rate": 3.975787565267151e-05, "loss": 0.8321, "step": 204500 }, { "epoch": 3.5904773609087237, "grad_norm": 0.048668258672571164, "learning_rate": 3.974912048496684e-05, "loss": 0.8361, "step": 204510 }, { "epoch": 3.590652925788725, "grad_norm": 0.052751446044337666, "learning_rate": 3.9740366093322624e-05, "loss": 0.8364, "step": 204520 }, { "epoch": 3.5908284906687267, "grad_norm": 0.05998474394491238, "learning_rate": 3.973161247784968e-05, "loss": 0.842, "step": 204530 }, { "epoch": 3.591004055548728, "grad_norm": 0.04068300147267656, "learning_rate": 3.972285963865894e-05, "loss": 0.8447, "step": 204540 }, { "epoch": 3.591179620428729, "grad_norm": 0.04148912892458969, "learning_rate": 3.9714107575861345e-05, "loss": 0.8456, "step": 204550 }, { "epoch": 3.591355185308731, "grad_norm": 0.058109584524336205, "learning_rate": 3.970535628956773e-05, "loss": 0.8462, "step": 204560 }, { "epoch": 3.591530750188732, "grad_norm": 0.06208247715264872, "learning_rate": 3.969660577988902e-05, "loss": 0.8499, "step": 204570 }, { "epoch": 3.5917063150687336, "grad_norm": 0.060244207185258684, "learning_rate": 3.9687856046935996e-05, "loss": 0.8496, "step": 204580 }, { "epoch": 3.591881879948735, "grad_norm": 0.057014059319014716, "learning_rate": 3.967910709081966e-05, "loss": 0.8437, "step": 204590 }, { "epoch": 3.5920574448287366, "grad_norm": 0.05354175560781497, "learning_rate": 3.967035891165079e-05, "loss": 0.8365, "step": 204600 }, { "epoch": 3.592233009708738, "grad_norm": 0.04952497051423705, "learning_rate": 3.9661611509540154e-05, "loss": 0.8434, "step": 204610 }, { "epoch": 3.592408574588739, "grad_norm": 0.05199113265511496, "learning_rate": 3.9652864884598726e-05, "loss": 0.8423, "step": 204620 }, { "epoch": 3.5925841394687406, "grad_norm": 0.042752107604686465, "learning_rate": 3.964411903693722e-05, "loss": 0.8411, "step": 204630 }, { "epoch": 3.592759704348742, "grad_norm": 0.058396589090821016, "learning_rate": 3.963537396666648e-05, "loss": 0.8435, "step": 204640 }, { "epoch": 3.5929352692287435, "grad_norm": 0.04426060478742266, "learning_rate": 3.962662967389736e-05, "loss": 0.8428, "step": 204650 }, { "epoch": 3.593110834108745, "grad_norm": 0.06726764283460383, "learning_rate": 3.961788615874056e-05, "loss": 0.8375, "step": 204660 }, { "epoch": 3.593286398988746, "grad_norm": 0.06492381408433007, "learning_rate": 3.9609143421306935e-05, "loss": 0.8376, "step": 204670 }, { "epoch": 3.593461963868748, "grad_norm": 0.0635092876736587, "learning_rate": 3.9600401461707174e-05, "loss": 0.8374, "step": 204680 }, { "epoch": 3.593637528748749, "grad_norm": 0.05313179254238296, "learning_rate": 3.959166028005216e-05, "loss": 0.8445, "step": 204690 }, { "epoch": 3.5938130936287505, "grad_norm": 0.050107896142732644, "learning_rate": 3.9582919876452545e-05, "loss": 0.8432, "step": 204700 }, { "epoch": 3.593988658508752, "grad_norm": 0.0758501328126826, "learning_rate": 3.95741802510191e-05, "loss": 0.8445, "step": 204710 }, { "epoch": 3.5941642233887534, "grad_norm": 0.06156026046656243, "learning_rate": 3.9565441403862597e-05, "loss": 0.8393, "step": 204720 }, { "epoch": 3.594339788268755, "grad_norm": 0.05137165778247093, "learning_rate": 3.9556703335093696e-05, "loss": 0.8476, "step": 204730 }, { "epoch": 3.594515353148756, "grad_norm": 0.07064476975960243, "learning_rate": 3.954796604482316e-05, "loss": 0.8485, "step": 204740 }, { "epoch": 3.5946909180287574, "grad_norm": 0.061450855796457704, "learning_rate": 3.953922953316163e-05, "loss": 0.8434, "step": 204750 }, { "epoch": 3.594866482908759, "grad_norm": 0.05544320313533874, "learning_rate": 3.953049380021989e-05, "loss": 0.8428, "step": 204760 }, { "epoch": 3.5950420477887604, "grad_norm": 0.05421355594516983, "learning_rate": 3.952175884610858e-05, "loss": 0.8463, "step": 204770 }, { "epoch": 3.595217612668762, "grad_norm": 0.05017102184770904, "learning_rate": 3.95130246709383e-05, "loss": 0.8412, "step": 204780 }, { "epoch": 3.5953931775487633, "grad_norm": 0.04677073399474747, "learning_rate": 3.950429127481984e-05, "loss": 0.8404, "step": 204790 }, { "epoch": 3.595568742428765, "grad_norm": 0.050960541088465414, "learning_rate": 3.949555865786377e-05, "loss": 0.8382, "step": 204800 }, { "epoch": 3.595744307308766, "grad_norm": 0.061062939452717006, "learning_rate": 3.9486826820180757e-05, "loss": 0.8462, "step": 204810 }, { "epoch": 3.5959198721887673, "grad_norm": 0.051380066182515026, "learning_rate": 3.9478095761881474e-05, "loss": 0.8408, "step": 204820 }, { "epoch": 3.5960954370687688, "grad_norm": 0.06995943111900167, "learning_rate": 3.946936548307647e-05, "loss": 0.8499, "step": 204830 }, { "epoch": 3.5962710019487703, "grad_norm": 0.04575708256248788, "learning_rate": 3.9460635983876435e-05, "loss": 0.8409, "step": 204840 }, { "epoch": 3.5964465668287717, "grad_norm": 0.05365746305898582, "learning_rate": 3.9451907264391854e-05, "loss": 0.8343, "step": 204850 }, { "epoch": 3.5966221317087728, "grad_norm": 0.04537718928167727, "learning_rate": 3.9443179324733483e-05, "loss": 0.8434, "step": 204860 }, { "epoch": 3.5967976965887742, "grad_norm": 0.057417647591066885, "learning_rate": 3.9434452165011795e-05, "loss": 0.8434, "step": 204870 }, { "epoch": 3.5969732614687757, "grad_norm": 0.058430749169114116, "learning_rate": 3.94257257853374e-05, "loss": 0.8434, "step": 204880 }, { "epoch": 3.597148826348777, "grad_norm": 0.07077381702810077, "learning_rate": 3.941700018582089e-05, "loss": 0.8369, "step": 204890 }, { "epoch": 3.5973243912287787, "grad_norm": 0.05728535322418454, "learning_rate": 3.940827536657275e-05, "loss": 0.8447, "step": 204900 }, { "epoch": 3.59749995610878, "grad_norm": 0.03733040770720047, "learning_rate": 3.939955132770361e-05, "loss": 0.8439, "step": 204910 }, { "epoch": 3.5976755209887816, "grad_norm": 0.05676995733419339, "learning_rate": 3.939082806932389e-05, "loss": 0.8398, "step": 204920 }, { "epoch": 3.5978510858687827, "grad_norm": 0.05439654773710176, "learning_rate": 3.9382105591544247e-05, "loss": 0.8348, "step": 204930 }, { "epoch": 3.598026650748784, "grad_norm": 0.04746052300164268, "learning_rate": 3.937338389447513e-05, "loss": 0.8411, "step": 204940 }, { "epoch": 3.5982022156287856, "grad_norm": 0.048560135141762974, "learning_rate": 3.9364662978226996e-05, "loss": 0.8354, "step": 204950 }, { "epoch": 3.598377780508787, "grad_norm": 0.05303638706241667, "learning_rate": 3.935594284291046e-05, "loss": 0.8414, "step": 204960 }, { "epoch": 3.5985533453887886, "grad_norm": 0.04985022539138696, "learning_rate": 3.934722348863591e-05, "loss": 0.8496, "step": 204970 }, { "epoch": 3.5987289102687896, "grad_norm": 0.058668657152235244, "learning_rate": 3.933850491551385e-05, "loss": 0.8377, "step": 204980 }, { "epoch": 3.598904475148791, "grad_norm": 0.05125664206009921, "learning_rate": 3.9329787123654784e-05, "loss": 0.8378, "step": 204990 }, { "epoch": 3.5990800400287926, "grad_norm": 0.051398140372506916, "learning_rate": 3.93210701131691e-05, "loss": 0.8463, "step": 205000 }, { "epoch": 3.599255604908794, "grad_norm": 0.06055750173302328, "learning_rate": 3.931235388416731e-05, "loss": 0.8186, "step": 205010 }, { "epoch": 3.5994311697887955, "grad_norm": 0.05198755123260614, "learning_rate": 3.930363843675976e-05, "loss": 0.8421, "step": 205020 }, { "epoch": 3.599606734668797, "grad_norm": 0.043392092560392934, "learning_rate": 3.929492377105699e-05, "loss": 0.8356, "step": 205030 }, { "epoch": 3.5997822995487985, "grad_norm": 0.04287648588073483, "learning_rate": 3.928620988716933e-05, "loss": 0.8462, "step": 205040 }, { "epoch": 3.5999578644287995, "grad_norm": 0.06927978297844112, "learning_rate": 3.927749678520722e-05, "loss": 0.8398, "step": 205050 }, { "epoch": 3.600133429308801, "grad_norm": 0.06803504008897289, "learning_rate": 3.926878446528109e-05, "loss": 0.8464, "step": 205060 }, { "epoch": 3.6003089941888025, "grad_norm": 0.04358008714444811, "learning_rate": 3.9260072927501264e-05, "loss": 0.8326, "step": 205070 }, { "epoch": 3.600484559068804, "grad_norm": 0.052450348517060943, "learning_rate": 3.9251362171978166e-05, "loss": 0.8444, "step": 205080 }, { "epoch": 3.6006601239488054, "grad_norm": 0.05635574224695734, "learning_rate": 3.924265219882209e-05, "loss": 0.8353, "step": 205090 }, { "epoch": 3.6008356888288064, "grad_norm": 0.04643866577186989, "learning_rate": 3.923394300814351e-05, "loss": 0.8374, "step": 205100 }, { "epoch": 3.601011253708808, "grad_norm": 0.04687123181143815, "learning_rate": 3.92252346000527e-05, "loss": 0.8415, "step": 205110 }, { "epoch": 3.6011868185888094, "grad_norm": 0.07381795863738276, "learning_rate": 3.921652697465995e-05, "loss": 0.8401, "step": 205120 }, { "epoch": 3.601362383468811, "grad_norm": 0.044798046763569944, "learning_rate": 3.9207820132075715e-05, "loss": 0.8382, "step": 205130 }, { "epoch": 3.6015379483488124, "grad_norm": 0.04561657906502529, "learning_rate": 3.919911407241019e-05, "loss": 0.8496, "step": 205140 }, { "epoch": 3.601713513228814, "grad_norm": 0.07555048640121428, "learning_rate": 3.919040879577375e-05, "loss": 0.837, "step": 205150 }, { "epoch": 3.6018890781088153, "grad_norm": 0.05339499803919997, "learning_rate": 3.91817043022767e-05, "loss": 0.8272, "step": 205160 }, { "epoch": 3.6020646429888163, "grad_norm": 0.05081296803064304, "learning_rate": 3.917300059202928e-05, "loss": 0.8349, "step": 205170 }, { "epoch": 3.602240207868818, "grad_norm": 0.05411707996630344, "learning_rate": 3.9164297665141816e-05, "loss": 0.8406, "step": 205180 }, { "epoch": 3.6024157727488193, "grad_norm": 0.04953020559180365, "learning_rate": 3.915559552172449e-05, "loss": 0.8426, "step": 205190 }, { "epoch": 3.6025913376288208, "grad_norm": 0.04799327735216028, "learning_rate": 3.914689416188769e-05, "loss": 0.8362, "step": 205200 }, { "epoch": 3.6027669025088223, "grad_norm": 0.05077452688504373, "learning_rate": 3.913819358574155e-05, "loss": 0.8492, "step": 205210 }, { "epoch": 3.6029424673888233, "grad_norm": 0.049182278042932875, "learning_rate": 3.912949379339637e-05, "loss": 0.8356, "step": 205220 }, { "epoch": 3.603118032268825, "grad_norm": 0.04472791617996965, "learning_rate": 3.912079478496238e-05, "loss": 0.8284, "step": 205230 }, { "epoch": 3.6032935971488262, "grad_norm": 0.052217825150400084, "learning_rate": 3.9112096560549745e-05, "loss": 0.8348, "step": 205240 }, { "epoch": 3.6034691620288277, "grad_norm": 0.05421949937660823, "learning_rate": 3.910339912026876e-05, "loss": 0.836, "step": 205250 }, { "epoch": 3.603644726908829, "grad_norm": 0.05926507954741897, "learning_rate": 3.909470246422948e-05, "loss": 0.8364, "step": 205260 }, { "epoch": 3.6038202917888307, "grad_norm": 0.07183488476387236, "learning_rate": 3.9086006592542265e-05, "loss": 0.8471, "step": 205270 }, { "epoch": 3.603995856668832, "grad_norm": 0.04545244485283583, "learning_rate": 3.907731150531721e-05, "loss": 0.8403, "step": 205280 }, { "epoch": 3.604171421548833, "grad_norm": 0.04845101775753622, "learning_rate": 3.906861720266441e-05, "loss": 0.845, "step": 205290 }, { "epoch": 3.6043469864288347, "grad_norm": 0.056701221906107026, "learning_rate": 3.905992368469418e-05, "loss": 0.8407, "step": 205300 }, { "epoch": 3.604522551308836, "grad_norm": 0.057408701242166, "learning_rate": 3.905123095151654e-05, "loss": 0.8394, "step": 205310 }, { "epoch": 3.6046981161888376, "grad_norm": 0.04621694729443932, "learning_rate": 3.904253900324169e-05, "loss": 0.8489, "step": 205320 }, { "epoch": 3.604873681068839, "grad_norm": 0.046230276568850384, "learning_rate": 3.903384783997977e-05, "loss": 0.8401, "step": 205330 }, { "epoch": 3.60504924594884, "grad_norm": 0.06264349765374169, "learning_rate": 3.902515746184085e-05, "loss": 0.841, "step": 205340 }, { "epoch": 3.605224810828842, "grad_norm": 0.05582212017999714, "learning_rate": 3.901646786893508e-05, "loss": 0.847, "step": 205350 }, { "epoch": 3.605400375708843, "grad_norm": 0.05520935873241114, "learning_rate": 3.900777906137248e-05, "loss": 0.8401, "step": 205360 }, { "epoch": 3.6055759405888446, "grad_norm": 0.054035228331527074, "learning_rate": 3.899909103926327e-05, "loss": 0.8435, "step": 205370 }, { "epoch": 3.605751505468846, "grad_norm": 0.04712151243625597, "learning_rate": 3.899040380271743e-05, "loss": 0.8454, "step": 205380 }, { "epoch": 3.6059270703488475, "grad_norm": 0.05634423836593053, "learning_rate": 3.898171735184505e-05, "loss": 0.8423, "step": 205390 }, { "epoch": 3.606102635228849, "grad_norm": 0.05033326573343576, "learning_rate": 3.897303168675624e-05, "loss": 0.8441, "step": 205400 }, { "epoch": 3.60627820010885, "grad_norm": 0.05956989644420946, "learning_rate": 3.896434680756096e-05, "loss": 0.8402, "step": 205410 }, { "epoch": 3.6064537649888515, "grad_norm": 0.08873274419657348, "learning_rate": 3.895566271436934e-05, "loss": 0.8415, "step": 205420 }, { "epoch": 3.606629329868853, "grad_norm": 0.05680151411637027, "learning_rate": 3.894697940729129e-05, "loss": 0.8372, "step": 205430 }, { "epoch": 3.6068048947488545, "grad_norm": 0.05747579759280873, "learning_rate": 3.893829688643696e-05, "loss": 0.844, "step": 205440 }, { "epoch": 3.606980459628856, "grad_norm": 0.06372835148996667, "learning_rate": 3.892961515191633e-05, "loss": 0.8415, "step": 205450 }, { "epoch": 3.607156024508857, "grad_norm": 0.05222856610704827, "learning_rate": 3.892093420383928e-05, "loss": 0.8477, "step": 205460 }, { "epoch": 3.607331589388859, "grad_norm": 0.07088707315735987, "learning_rate": 3.891225404231597e-05, "loss": 0.8516, "step": 205470 }, { "epoch": 3.60750715426886, "grad_norm": 0.04989059129260786, "learning_rate": 3.8903574667456264e-05, "loss": 0.8418, "step": 205480 }, { "epoch": 3.6076827191488614, "grad_norm": 0.05327714673891411, "learning_rate": 3.889489607937017e-05, "loss": 0.8427, "step": 205490 }, { "epoch": 3.607858284028863, "grad_norm": 0.06067340965083805, "learning_rate": 3.8886218278167676e-05, "loss": 0.8457, "step": 205500 }, { "epoch": 3.6080338489088644, "grad_norm": 0.07339359310357513, "learning_rate": 3.887754126395866e-05, "loss": 0.8392, "step": 205510 }, { "epoch": 3.608209413788866, "grad_norm": 0.05268136270198482, "learning_rate": 3.8868865036853144e-05, "loss": 0.8408, "step": 205520 }, { "epoch": 3.608384978668867, "grad_norm": 0.04622489179103295, "learning_rate": 3.886018959696095e-05, "loss": 0.8418, "step": 205530 }, { "epoch": 3.6085605435488683, "grad_norm": 0.05559462171428013, "learning_rate": 3.8851514944392124e-05, "loss": 0.8378, "step": 205540 }, { "epoch": 3.60873610842887, "grad_norm": 0.05120516176971408, "learning_rate": 3.884284107925649e-05, "loss": 0.8522, "step": 205550 }, { "epoch": 3.6089116733088713, "grad_norm": 0.052227965172714295, "learning_rate": 3.883416800166396e-05, "loss": 0.8405, "step": 205560 }, { "epoch": 3.609087238188873, "grad_norm": 0.05336247145481215, "learning_rate": 3.882549571172447e-05, "loss": 0.845, "step": 205570 }, { "epoch": 3.6092628030688743, "grad_norm": 0.04634123985089257, "learning_rate": 3.881682420954784e-05, "loss": 0.8333, "step": 205580 }, { "epoch": 3.6094383679488757, "grad_norm": 0.053583393390799754, "learning_rate": 3.880815349524399e-05, "loss": 0.8388, "step": 205590 }, { "epoch": 3.6096139328288768, "grad_norm": 0.049951565724785804, "learning_rate": 3.8799483568922697e-05, "loss": 0.8392, "step": 205600 }, { "epoch": 3.6097894977088782, "grad_norm": 0.06183337513198959, "learning_rate": 3.8790814430693926e-05, "loss": 0.84, "step": 205610 }, { "epoch": 3.6099650625888797, "grad_norm": 0.05833007666109794, "learning_rate": 3.878214608066747e-05, "loss": 0.8491, "step": 205620 }, { "epoch": 3.610140627468881, "grad_norm": 0.054275177841680775, "learning_rate": 3.877347851895307e-05, "loss": 0.8444, "step": 205630 }, { "epoch": 3.6103161923488827, "grad_norm": 0.055208003051766395, "learning_rate": 3.876481174566069e-05, "loss": 0.836, "step": 205640 }, { "epoch": 3.6104917572288837, "grad_norm": 0.0435465960383356, "learning_rate": 3.875614576090004e-05, "loss": 0.842, "step": 205650 }, { "epoch": 3.610667322108885, "grad_norm": 0.053880028810798855, "learning_rate": 3.874748056478095e-05, "loss": 0.8382, "step": 205660 }, { "epoch": 3.6108428869888867, "grad_norm": 0.052297215929060024, "learning_rate": 3.8738816157413246e-05, "loss": 0.8393, "step": 205670 }, { "epoch": 3.611018451868888, "grad_norm": 0.06195818724401174, "learning_rate": 3.873015253890665e-05, "loss": 0.8333, "step": 205680 }, { "epoch": 3.6111940167488896, "grad_norm": 0.047339024016607396, "learning_rate": 3.872148970937098e-05, "loss": 0.8373, "step": 205690 }, { "epoch": 3.611369581628891, "grad_norm": 0.05458551659256949, "learning_rate": 3.87128276689159e-05, "loss": 0.8385, "step": 205700 }, { "epoch": 3.6115451465088926, "grad_norm": 0.05482436318924736, "learning_rate": 3.8704166417651314e-05, "loss": 0.8361, "step": 205710 }, { "epoch": 3.6117207113888936, "grad_norm": 0.05054892788381928, "learning_rate": 3.869550595568682e-05, "loss": 0.8433, "step": 205720 }, { "epoch": 3.611896276268895, "grad_norm": 0.06021391401244948, "learning_rate": 3.868684628313223e-05, "loss": 0.8441, "step": 205730 }, { "epoch": 3.6120718411488966, "grad_norm": 0.07310333702765937, "learning_rate": 3.867818740009726e-05, "loss": 0.8405, "step": 205740 }, { "epoch": 3.612247406028898, "grad_norm": 0.05681457612712172, "learning_rate": 3.866952930669157e-05, "loss": 0.8384, "step": 205750 }, { "epoch": 3.6124229709088995, "grad_norm": 0.047820006658318946, "learning_rate": 3.8660872003024915e-05, "loss": 0.8336, "step": 205760 }, { "epoch": 3.6125985357889006, "grad_norm": 0.06429024545419013, "learning_rate": 3.86522154892069e-05, "loss": 0.8376, "step": 205770 }, { "epoch": 3.612774100668902, "grad_norm": 0.0516275166386642, "learning_rate": 3.864355976534734e-05, "loss": 0.8342, "step": 205780 }, { "epoch": 3.6129496655489035, "grad_norm": 0.049801170651265415, "learning_rate": 3.8634904831555815e-05, "loss": 0.8352, "step": 205790 }, { "epoch": 3.613125230428905, "grad_norm": 0.05029178258873204, "learning_rate": 3.8626250687941935e-05, "loss": 0.8287, "step": 205800 }, { "epoch": 3.6133007953089065, "grad_norm": 0.051623569774772164, "learning_rate": 3.8617597334615466e-05, "loss": 0.8389, "step": 205810 }, { "epoch": 3.613476360188908, "grad_norm": 0.05544721158993487, "learning_rate": 3.860894477168598e-05, "loss": 0.8398, "step": 205820 }, { "epoch": 3.6136519250689094, "grad_norm": 0.047565347582713816, "learning_rate": 3.860029299926311e-05, "loss": 0.8438, "step": 205830 }, { "epoch": 3.6138274899489105, "grad_norm": 0.061203683609826025, "learning_rate": 3.859164201745652e-05, "loss": 0.8353, "step": 205840 }, { "epoch": 3.614003054828912, "grad_norm": 0.05332895454644122, "learning_rate": 3.858299182637577e-05, "loss": 0.8416, "step": 205850 }, { "epoch": 3.6141786197089134, "grad_norm": 0.04389786701703956, "learning_rate": 3.8574342426130494e-05, "loss": 0.8444, "step": 205860 }, { "epoch": 3.614354184588915, "grad_norm": 0.06383018709991686, "learning_rate": 3.85656938168302e-05, "loss": 0.8347, "step": 205870 }, { "epoch": 3.6145297494689164, "grad_norm": 0.04782890198088906, "learning_rate": 3.855704599858459e-05, "loss": 0.8315, "step": 205880 }, { "epoch": 3.6147053143489174, "grad_norm": 0.04715849153760157, "learning_rate": 3.854839897150315e-05, "loss": 0.8348, "step": 205890 }, { "epoch": 3.6148808792289193, "grad_norm": 0.04774392650030874, "learning_rate": 3.8539752735695456e-05, "loss": 0.8443, "step": 205900 }, { "epoch": 3.6150564441089204, "grad_norm": 0.044066630434360765, "learning_rate": 3.853110729127111e-05, "loss": 0.829, "step": 205910 }, { "epoch": 3.615232008988922, "grad_norm": 0.053614839788542895, "learning_rate": 3.8522462638339566e-05, "loss": 0.8418, "step": 205920 }, { "epoch": 3.6154075738689233, "grad_norm": 0.04564620893872445, "learning_rate": 3.851381877701043e-05, "loss": 0.8342, "step": 205930 }, { "epoch": 3.615583138748925, "grad_norm": 0.0532526513338837, "learning_rate": 3.850517570739312e-05, "loss": 0.8469, "step": 205940 }, { "epoch": 3.6157587036289263, "grad_norm": 0.05385917546445037, "learning_rate": 3.849653342959727e-05, "loss": 0.847, "step": 205950 }, { "epoch": 3.6159342685089273, "grad_norm": 0.07133877523512872, "learning_rate": 3.848789194373233e-05, "loss": 0.8427, "step": 205960 }, { "epoch": 3.6161098333889288, "grad_norm": 0.04304159778462057, "learning_rate": 3.847925124990771e-05, "loss": 0.8368, "step": 205970 }, { "epoch": 3.6162853982689303, "grad_norm": 0.05969400719428752, "learning_rate": 3.847061134823302e-05, "loss": 0.8377, "step": 205980 }, { "epoch": 3.6164609631489317, "grad_norm": 0.04950107243832494, "learning_rate": 3.846197223881764e-05, "loss": 0.8354, "step": 205990 }, { "epoch": 3.616636528028933, "grad_norm": 0.04812405896738595, "learning_rate": 3.845333392177106e-05, "loss": 0.8413, "step": 206000 }, { "epoch": 3.6168120929089342, "grad_norm": 0.044200551724439055, "learning_rate": 3.844469639720275e-05, "loss": 0.8418, "step": 206010 }, { "epoch": 3.616987657788936, "grad_norm": 0.06081836588899363, "learning_rate": 3.843605966522209e-05, "loss": 0.8431, "step": 206020 }, { "epoch": 3.617163222668937, "grad_norm": 0.051067786115450355, "learning_rate": 3.842742372593859e-05, "loss": 0.8436, "step": 206030 }, { "epoch": 3.6173387875489387, "grad_norm": 0.053376288577542695, "learning_rate": 3.8418788579461554e-05, "loss": 0.8381, "step": 206040 }, { "epoch": 3.61751435242894, "grad_norm": 0.05130427154109696, "learning_rate": 3.841015422590052e-05, "loss": 0.845, "step": 206050 }, { "epoch": 3.6176899173089416, "grad_norm": 0.04093502558692911, "learning_rate": 3.840152066536479e-05, "loss": 0.8491, "step": 206060 }, { "epoch": 3.617865482188943, "grad_norm": 0.07628326013346551, "learning_rate": 3.8392887897963806e-05, "loss": 0.8395, "step": 206070 }, { "epoch": 3.618041047068944, "grad_norm": 0.05518679821728958, "learning_rate": 3.838425592380695e-05, "loss": 0.8424, "step": 206080 }, { "epoch": 3.6182166119489456, "grad_norm": 0.0670995177317251, "learning_rate": 3.837562474300355e-05, "loss": 0.8314, "step": 206090 }, { "epoch": 3.618392176828947, "grad_norm": 0.05035822525168887, "learning_rate": 3.836699435566301e-05, "loss": 0.839, "step": 206100 }, { "epoch": 3.6185677417089486, "grad_norm": 0.05599632016160772, "learning_rate": 3.835836476189459e-05, "loss": 0.8363, "step": 206110 }, { "epoch": 3.61874330658895, "grad_norm": 0.06123110149596059, "learning_rate": 3.834973596180777e-05, "loss": 0.8424, "step": 206120 }, { "epoch": 3.618918871468951, "grad_norm": 0.055097016723922684, "learning_rate": 3.83411079555118e-05, "loss": 0.8412, "step": 206130 }, { "epoch": 3.619094436348953, "grad_norm": 0.053104643558551545, "learning_rate": 3.833248074311592e-05, "loss": 0.8312, "step": 206140 }, { "epoch": 3.619270001228954, "grad_norm": 0.06708783452971621, "learning_rate": 3.8323854324729604e-05, "loss": 0.841, "step": 206150 }, { "epoch": 3.6194455661089555, "grad_norm": 0.06694328705197654, "learning_rate": 3.8315228700462026e-05, "loss": 0.8424, "step": 206160 }, { "epoch": 3.619621130988957, "grad_norm": 0.04916098478172704, "learning_rate": 3.83066038704225e-05, "loss": 0.8419, "step": 206170 }, { "epoch": 3.6197966958689585, "grad_norm": 0.055964557594899496, "learning_rate": 3.829797983472037e-05, "loss": 0.8372, "step": 206180 }, { "epoch": 3.61997226074896, "grad_norm": 0.04575873824684855, "learning_rate": 3.828935659346482e-05, "loss": 0.8427, "step": 206190 }, { "epoch": 3.620147825628961, "grad_norm": 0.05713333562760868, "learning_rate": 3.8280734146765155e-05, "loss": 0.8419, "step": 206200 }, { "epoch": 3.6203233905089625, "grad_norm": 0.0725651423546486, "learning_rate": 3.827211249473055e-05, "loss": 0.8405, "step": 206210 }, { "epoch": 3.620498955388964, "grad_norm": 0.05260416696395593, "learning_rate": 3.8263491637470375e-05, "loss": 0.8428, "step": 206220 }, { "epoch": 3.6206745202689654, "grad_norm": 0.06452169178070956, "learning_rate": 3.825487157509374e-05, "loss": 0.8324, "step": 206230 }, { "epoch": 3.620850085148967, "grad_norm": 0.048812875011157573, "learning_rate": 3.8246252307709914e-05, "loss": 0.8374, "step": 206240 }, { "epoch": 3.6210256500289684, "grad_norm": 0.05039713321673325, "learning_rate": 3.823763383542813e-05, "loss": 0.8354, "step": 206250 }, { "epoch": 3.62120121490897, "grad_norm": 0.043539241481947225, "learning_rate": 3.8229016158357526e-05, "loss": 0.8363, "step": 206260 }, { "epoch": 3.621376779788971, "grad_norm": 0.05835300006825358, "learning_rate": 3.822039927660734e-05, "loss": 0.843, "step": 206270 }, { "epoch": 3.6215523446689724, "grad_norm": 0.048539226699357306, "learning_rate": 3.821178319028667e-05, "loss": 0.8397, "step": 206280 }, { "epoch": 3.621727909548974, "grad_norm": 0.0505786266005709, "learning_rate": 3.82031678995048e-05, "loss": 0.8472, "step": 206290 }, { "epoch": 3.6219034744289753, "grad_norm": 0.05376126825107002, "learning_rate": 3.819455340437083e-05, "loss": 0.8367, "step": 206300 }, { "epoch": 3.622079039308977, "grad_norm": 0.043866455637275745, "learning_rate": 3.818593970499383e-05, "loss": 0.838, "step": 206310 }, { "epoch": 3.622254604188978, "grad_norm": 0.05829822814889147, "learning_rate": 3.8177326801483096e-05, "loss": 0.8405, "step": 206320 }, { "epoch": 3.6224301690689793, "grad_norm": 0.05149512205591227, "learning_rate": 3.816871469394763e-05, "loss": 0.8371, "step": 206330 }, { "epoch": 3.6226057339489808, "grad_norm": 0.05847530308499657, "learning_rate": 3.816010338249659e-05, "loss": 0.8371, "step": 206340 }, { "epoch": 3.6227812988289823, "grad_norm": 0.06004125718103159, "learning_rate": 3.815149286723912e-05, "loss": 0.8373, "step": 206350 }, { "epoch": 3.6229568637089837, "grad_norm": 0.04616018779920488, "learning_rate": 3.814288314828425e-05, "loss": 0.8366, "step": 206360 }, { "epoch": 3.623132428588985, "grad_norm": 0.06809861558078194, "learning_rate": 3.8134274225741125e-05, "loss": 0.8352, "step": 206370 }, { "epoch": 3.6233079934689867, "grad_norm": 0.0551047293064294, "learning_rate": 3.8125666099718716e-05, "loss": 0.8544, "step": 206380 }, { "epoch": 3.6234835583489877, "grad_norm": 0.06712575936336419, "learning_rate": 3.811705877032624e-05, "loss": 0.8382, "step": 206390 }, { "epoch": 3.623659123228989, "grad_norm": 0.06508140155927838, "learning_rate": 3.810845223767265e-05, "loss": 0.8477, "step": 206400 }, { "epoch": 3.6238346881089907, "grad_norm": 0.07290756342540618, "learning_rate": 3.809984650186703e-05, "loss": 0.839, "step": 206410 }, { "epoch": 3.624010252988992, "grad_norm": 0.04985690292632335, "learning_rate": 3.809124156301844e-05, "loss": 0.8385, "step": 206420 }, { "epoch": 3.6241858178689936, "grad_norm": 0.046799534293092464, "learning_rate": 3.808263742123584e-05, "loss": 0.8404, "step": 206430 }, { "epoch": 3.6243613827489947, "grad_norm": 0.05736580525830358, "learning_rate": 3.807403407662832e-05, "loss": 0.8396, "step": 206440 }, { "epoch": 3.624536947628996, "grad_norm": 0.06913738628558357, "learning_rate": 3.806543152930477e-05, "loss": 0.8383, "step": 206450 }, { "epoch": 3.6247125125089976, "grad_norm": 0.05665875881666919, "learning_rate": 3.8056829779374345e-05, "loss": 0.8429, "step": 206460 }, { "epoch": 3.624888077388999, "grad_norm": 0.052040673477495164, "learning_rate": 3.8048228826945936e-05, "loss": 0.8389, "step": 206470 }, { "epoch": 3.6250636422690006, "grad_norm": 0.05995725153018644, "learning_rate": 3.8039628672128474e-05, "loss": 0.8404, "step": 206480 }, { "epoch": 3.625239207149002, "grad_norm": 0.05239430104042522, "learning_rate": 3.803102931503105e-05, "loss": 0.839, "step": 206490 }, { "epoch": 3.6254147720290035, "grad_norm": 0.048386591402977734, "learning_rate": 3.802243075576252e-05, "loss": 0.8432, "step": 206500 }, { "epoch": 3.6255903369090046, "grad_norm": 0.07512464717994759, "learning_rate": 3.8013832994431845e-05, "loss": 0.8407, "step": 206510 }, { "epoch": 3.625765901789006, "grad_norm": 0.04993971159162274, "learning_rate": 3.8005236031148036e-05, "loss": 0.8395, "step": 206520 }, { "epoch": 3.6259414666690075, "grad_norm": 0.048844414580567354, "learning_rate": 3.7996639866019904e-05, "loss": 0.8349, "step": 206530 }, { "epoch": 3.626117031549009, "grad_norm": 0.04713639528947886, "learning_rate": 3.7988044499156465e-05, "loss": 0.8428, "step": 206540 }, { "epoch": 3.6262925964290105, "grad_norm": 0.05605772707236479, "learning_rate": 3.797944993066652e-05, "loss": 0.8408, "step": 206550 }, { "epoch": 3.6264681613090115, "grad_norm": 0.055607924171770774, "learning_rate": 3.797085616065907e-05, "loss": 0.8387, "step": 206560 }, { "epoch": 3.626643726189013, "grad_norm": 0.049416292077697835, "learning_rate": 3.796226318924293e-05, "loss": 0.8374, "step": 206570 }, { "epoch": 3.6268192910690145, "grad_norm": 0.049424556307928594, "learning_rate": 3.7953671016526985e-05, "loss": 0.8443, "step": 206580 }, { "epoch": 3.626994855949016, "grad_norm": 0.05525255381294665, "learning_rate": 3.794507964262016e-05, "loss": 0.8369, "step": 206590 }, { "epoch": 3.6271704208290174, "grad_norm": 0.06130387710275784, "learning_rate": 3.793648906763121e-05, "loss": 0.8382, "step": 206600 }, { "epoch": 3.627345985709019, "grad_norm": 0.05286148979032406, "learning_rate": 3.792789929166908e-05, "loss": 0.843, "step": 206610 }, { "epoch": 3.6275215505890204, "grad_norm": 0.04817752804282527, "learning_rate": 3.791931031484249e-05, "loss": 0.8348, "step": 206620 }, { "epoch": 3.6276971154690214, "grad_norm": 0.05607929932615158, "learning_rate": 3.791072213726038e-05, "loss": 0.8363, "step": 206630 }, { "epoch": 3.627872680349023, "grad_norm": 0.0661608615296563, "learning_rate": 3.7902134759031513e-05, "loss": 0.8384, "step": 206640 }, { "epoch": 3.6280482452290244, "grad_norm": 0.05787240988631175, "learning_rate": 3.789354818026462e-05, "loss": 0.8389, "step": 206650 }, { "epoch": 3.628223810109026, "grad_norm": 0.0675580752365929, "learning_rate": 3.788496240106865e-05, "loss": 0.8375, "step": 206660 }, { "epoch": 3.6283993749890273, "grad_norm": 0.04656848651336428, "learning_rate": 3.787637742155226e-05, "loss": 0.8444, "step": 206670 }, { "epoch": 3.6285749398690283, "grad_norm": 0.10675013013946438, "learning_rate": 3.7867793241824265e-05, "loss": 0.8371, "step": 206680 }, { "epoch": 3.6287505047490303, "grad_norm": 0.05241834011334658, "learning_rate": 3.7859209861993456e-05, "loss": 0.8408, "step": 206690 }, { "epoch": 3.6289260696290313, "grad_norm": 0.06895221611559156, "learning_rate": 3.7850627282168535e-05, "loss": 0.8474, "step": 206700 }, { "epoch": 3.629101634509033, "grad_norm": 0.043235798537422626, "learning_rate": 3.78420455024583e-05, "loss": 0.843, "step": 206710 }, { "epoch": 3.6292771993890343, "grad_norm": 0.04945948022728828, "learning_rate": 3.783346452297137e-05, "loss": 0.8376, "step": 206720 }, { "epoch": 3.6294527642690357, "grad_norm": 0.04940295692070118, "learning_rate": 3.7824884343816644e-05, "loss": 0.8366, "step": 206730 }, { "epoch": 3.629628329149037, "grad_norm": 0.04892515547978079, "learning_rate": 3.781630496510269e-05, "loss": 0.8378, "step": 206740 }, { "epoch": 3.6298038940290382, "grad_norm": 0.06207454861361658, "learning_rate": 3.780772638693826e-05, "loss": 0.8378, "step": 206750 }, { "epoch": 3.6299794589090397, "grad_norm": 0.06047101949372692, "learning_rate": 3.7799148609432094e-05, "loss": 0.839, "step": 206760 }, { "epoch": 3.630155023789041, "grad_norm": 0.043041360592303395, "learning_rate": 3.779057163269279e-05, "loss": 0.8324, "step": 206770 }, { "epoch": 3.6303305886690427, "grad_norm": 0.0445899312963184, "learning_rate": 3.778199545682909e-05, "loss": 0.8394, "step": 206780 }, { "epoch": 3.630506153549044, "grad_norm": 0.04825648469281864, "learning_rate": 3.7773420081949544e-05, "loss": 0.8402, "step": 206790 }, { "epoch": 3.630681718429045, "grad_norm": 0.047162866501062335, "learning_rate": 3.776484550816297e-05, "loss": 0.842, "step": 206800 }, { "epoch": 3.630857283309047, "grad_norm": 0.06095831120279771, "learning_rate": 3.775627173557791e-05, "loss": 0.8298, "step": 206810 }, { "epoch": 3.631032848189048, "grad_norm": 0.049419346917703985, "learning_rate": 3.774769876430293e-05, "loss": 0.8426, "step": 206820 }, { "epoch": 3.6312084130690496, "grad_norm": 0.045562119848599196, "learning_rate": 3.773912659444682e-05, "loss": 0.8419, "step": 206830 }, { "epoch": 3.631383977949051, "grad_norm": 0.05322112391395556, "learning_rate": 3.773055522611805e-05, "loss": 0.8382, "step": 206840 }, { "epoch": 3.6315595428290526, "grad_norm": 0.06718791561213731, "learning_rate": 3.772198465942528e-05, "loss": 0.8389, "step": 206850 }, { "epoch": 3.631735107709054, "grad_norm": 0.05563110962706107, "learning_rate": 3.771341489447712e-05, "loss": 0.8339, "step": 206860 }, { "epoch": 3.631910672589055, "grad_norm": 0.06366855094173428, "learning_rate": 3.770484593138209e-05, "loss": 0.8418, "step": 206870 }, { "epoch": 3.6320862374690566, "grad_norm": 0.06364165464547106, "learning_rate": 3.7696277770248825e-05, "loss": 0.8257, "step": 206880 }, { "epoch": 3.632261802349058, "grad_norm": 0.05314683981206108, "learning_rate": 3.76877104111858e-05, "loss": 0.8414, "step": 206890 }, { "epoch": 3.6324373672290595, "grad_norm": 0.0433636091864561, "learning_rate": 3.7679143854301656e-05, "loss": 0.8429, "step": 206900 }, { "epoch": 3.632612932109061, "grad_norm": 0.06528285726070457, "learning_rate": 3.767057809970488e-05, "loss": 0.8422, "step": 206910 }, { "epoch": 3.632788496989062, "grad_norm": 0.05502964623761306, "learning_rate": 3.766201314750402e-05, "loss": 0.8413, "step": 206920 }, { "epoch": 3.632964061869064, "grad_norm": 0.05621620277537167, "learning_rate": 3.7653448997807616e-05, "loss": 0.8376, "step": 206930 }, { "epoch": 3.633139626749065, "grad_norm": 0.0482714620927815, "learning_rate": 3.764488565072412e-05, "loss": 0.8387, "step": 206940 }, { "epoch": 3.6333151916290665, "grad_norm": 0.05576458963408952, "learning_rate": 3.7636323106362104e-05, "loss": 0.8433, "step": 206950 }, { "epoch": 3.633490756509068, "grad_norm": 0.06066924050985779, "learning_rate": 3.7627761364829944e-05, "loss": 0.8334, "step": 206960 }, { "epoch": 3.6336663213890694, "grad_norm": 0.05475117773966392, "learning_rate": 3.761920042623627e-05, "loss": 0.8428, "step": 206970 }, { "epoch": 3.633841886269071, "grad_norm": 0.057105337826052795, "learning_rate": 3.761064029068947e-05, "loss": 0.834, "step": 206980 }, { "epoch": 3.634017451149072, "grad_norm": 0.05980819069561032, "learning_rate": 3.760208095829793e-05, "loss": 0.8407, "step": 206990 }, { "epoch": 3.6341930160290734, "grad_norm": 0.04283269203300795, "learning_rate": 3.7593522429170255e-05, "loss": 0.8441, "step": 207000 }, { "epoch": 3.634368580909075, "grad_norm": 0.05278019825148156, "learning_rate": 3.758496470341475e-05, "loss": 0.8467, "step": 207010 }, { "epoch": 3.6345441457890764, "grad_norm": 0.04402144815741203, "learning_rate": 3.757640778113992e-05, "loss": 0.8409, "step": 207020 }, { "epoch": 3.634719710669078, "grad_norm": 0.07039610896902142, "learning_rate": 3.756785166245417e-05, "loss": 0.8337, "step": 207030 }, { "epoch": 3.6348952755490793, "grad_norm": 0.05430228694627302, "learning_rate": 3.755929634746588e-05, "loss": 0.838, "step": 207040 }, { "epoch": 3.635070840429081, "grad_norm": 0.06423375643508548, "learning_rate": 3.7550741836283474e-05, "loss": 0.8406, "step": 207050 }, { "epoch": 3.635246405309082, "grad_norm": 0.047374250098700235, "learning_rate": 3.754218812901529e-05, "loss": 0.8402, "step": 207060 }, { "epoch": 3.6354219701890833, "grad_norm": 0.05503793898805805, "learning_rate": 3.753363522576979e-05, "loss": 0.844, "step": 207070 }, { "epoch": 3.635597535069085, "grad_norm": 0.04998968803100997, "learning_rate": 3.7525083126655276e-05, "loss": 0.841, "step": 207080 }, { "epoch": 3.6357730999490863, "grad_norm": 0.062244397700354094, "learning_rate": 3.751653183178011e-05, "loss": 0.837, "step": 207090 }, { "epoch": 3.6359486648290877, "grad_norm": 0.04888387207477947, "learning_rate": 3.75079813412527e-05, "loss": 0.8461, "step": 207100 }, { "epoch": 3.6361242297090888, "grad_norm": 0.04129128475053713, "learning_rate": 3.749943165518129e-05, "loss": 0.8433, "step": 207110 }, { "epoch": 3.6362997945890903, "grad_norm": 0.07497712841007043, "learning_rate": 3.749088277367429e-05, "loss": 0.8447, "step": 207120 }, { "epoch": 3.6364753594690917, "grad_norm": 0.05086545998171429, "learning_rate": 3.748233469683991e-05, "loss": 0.8384, "step": 207130 }, { "epoch": 3.636650924349093, "grad_norm": 0.04836266065871575, "learning_rate": 3.747378742478659e-05, "loss": 0.8464, "step": 207140 }, { "epoch": 3.6368264892290947, "grad_norm": 0.05105571221958425, "learning_rate": 3.746524095762255e-05, "loss": 0.8325, "step": 207150 }, { "epoch": 3.637002054109096, "grad_norm": 0.061926528730138326, "learning_rate": 3.745669529545603e-05, "loss": 0.8295, "step": 207160 }, { "epoch": 3.6371776189890976, "grad_norm": 0.04662248523986968, "learning_rate": 3.7448150438395415e-05, "loss": 0.8403, "step": 207170 }, { "epoch": 3.6373531838690987, "grad_norm": 0.05603760566279675, "learning_rate": 3.7439606386548876e-05, "loss": 0.8377, "step": 207180 }, { "epoch": 3.6375287487491, "grad_norm": 0.05331352910409461, "learning_rate": 3.743106314002472e-05, "loss": 0.8435, "step": 207190 }, { "epoch": 3.6377043136291016, "grad_norm": 0.04891343311418888, "learning_rate": 3.74225206989312e-05, "loss": 0.8384, "step": 207200 }, { "epoch": 3.637879878509103, "grad_norm": 0.05659100900083427, "learning_rate": 3.7413979063376496e-05, "loss": 0.8437, "step": 207210 }, { "epoch": 3.6380554433891046, "grad_norm": 0.04514120848182934, "learning_rate": 3.74054382334689e-05, "loss": 0.8434, "step": 207220 }, { "epoch": 3.6382310082691056, "grad_norm": 0.052072440720698394, "learning_rate": 3.7396898209316514e-05, "loss": 0.8426, "step": 207230 }, { "epoch": 3.638406573149107, "grad_norm": 0.045519624334827466, "learning_rate": 3.73883589910277e-05, "loss": 0.8383, "step": 207240 }, { "epoch": 3.6385821380291086, "grad_norm": 0.07305785938916413, "learning_rate": 3.737982057871052e-05, "loss": 0.8459, "step": 207250 }, { "epoch": 3.63875770290911, "grad_norm": 0.0509833639454058, "learning_rate": 3.737128297247321e-05, "loss": 0.8403, "step": 207260 }, { "epoch": 3.6389332677891115, "grad_norm": 0.058771976698932374, "learning_rate": 3.736274617242397e-05, "loss": 0.8415, "step": 207270 }, { "epoch": 3.639108832669113, "grad_norm": 0.059192640781358545, "learning_rate": 3.7354210178670903e-05, "loss": 0.8383, "step": 207280 }, { "epoch": 3.6392843975491145, "grad_norm": 0.048446883583668915, "learning_rate": 3.734567499132223e-05, "loss": 0.8445, "step": 207290 }, { "epoch": 3.6394599624291155, "grad_norm": 0.04742567564614266, "learning_rate": 3.7337140610485974e-05, "loss": 0.8341, "step": 207300 }, { "epoch": 3.639635527309117, "grad_norm": 0.05951340477158117, "learning_rate": 3.732860703627042e-05, "loss": 0.8508, "step": 207310 }, { "epoch": 3.6398110921891185, "grad_norm": 0.04967577794841141, "learning_rate": 3.732007426878362e-05, "loss": 0.8454, "step": 207320 }, { "epoch": 3.63998665706912, "grad_norm": 0.06775774676177379, "learning_rate": 3.731154230813361e-05, "loss": 0.8418, "step": 207330 }, { "epoch": 3.6401622219491214, "grad_norm": 0.05306842214266816, "learning_rate": 3.730301115442864e-05, "loss": 0.8423, "step": 207340 }, { "epoch": 3.6403377868291225, "grad_norm": 0.04960942986136163, "learning_rate": 3.729448080777669e-05, "loss": 0.8428, "step": 207350 }, { "epoch": 3.6405133517091244, "grad_norm": 0.0718637261927499, "learning_rate": 3.728595126828588e-05, "loss": 0.8298, "step": 207360 }, { "epoch": 3.6406889165891254, "grad_norm": 0.055136435431068456, "learning_rate": 3.727742253606431e-05, "loss": 0.8449, "step": 207370 }, { "epoch": 3.640864481469127, "grad_norm": 0.05205667573311479, "learning_rate": 3.726889461121997e-05, "loss": 0.8444, "step": 207380 }, { "epoch": 3.6410400463491284, "grad_norm": 0.06799965277889375, "learning_rate": 3.7260367493860984e-05, "loss": 0.8354, "step": 207390 }, { "epoch": 3.64121561122913, "grad_norm": 0.050503526102781124, "learning_rate": 3.72518411840953e-05, "loss": 0.8421, "step": 207400 }, { "epoch": 3.6413911761091313, "grad_norm": 0.041889422020677344, "learning_rate": 3.724331568203107e-05, "loss": 0.8407, "step": 207410 }, { "epoch": 3.6415667409891324, "grad_norm": 0.04784572330528736, "learning_rate": 3.723479098777621e-05, "loss": 0.8379, "step": 207420 }, { "epoch": 3.641742305869134, "grad_norm": 0.0551437536951535, "learning_rate": 3.7226267101438764e-05, "loss": 0.8447, "step": 207430 }, { "epoch": 3.6419178707491353, "grad_norm": 0.04094951266987548, "learning_rate": 3.721774402312678e-05, "loss": 0.8437, "step": 207440 }, { "epoch": 3.642093435629137, "grad_norm": 0.04909469916016027, "learning_rate": 3.720922175294816e-05, "loss": 0.8438, "step": 207450 }, { "epoch": 3.6422690005091383, "grad_norm": 0.0516606072225462, "learning_rate": 3.720070029101096e-05, "loss": 0.84, "step": 207460 }, { "epoch": 3.6424445653891393, "grad_norm": 0.04894456276118399, "learning_rate": 3.719217963742305e-05, "loss": 0.8435, "step": 207470 }, { "epoch": 3.642620130269141, "grad_norm": 0.04570873504660865, "learning_rate": 3.718365979229253e-05, "loss": 0.8301, "step": 207480 }, { "epoch": 3.6427956951491423, "grad_norm": 0.054550617134065595, "learning_rate": 3.717514075572725e-05, "loss": 0.8399, "step": 207490 }, { "epoch": 3.6429712600291437, "grad_norm": 0.043413964279711004, "learning_rate": 3.7166622527835125e-05, "loss": 0.847, "step": 207500 }, { "epoch": 3.643146824909145, "grad_norm": 0.04419167820603915, "learning_rate": 3.7158105108724186e-05, "loss": 0.8394, "step": 207510 }, { "epoch": 3.6433223897891467, "grad_norm": 0.05765268571678897, "learning_rate": 3.714958849850225e-05, "loss": 0.8459, "step": 207520 }, { "epoch": 3.643497954669148, "grad_norm": 0.05791250316344679, "learning_rate": 3.71410726972773e-05, "loss": 0.8403, "step": 207530 }, { "epoch": 3.643673519549149, "grad_norm": 0.05018405305979568, "learning_rate": 3.7132557705157165e-05, "loss": 0.8425, "step": 207540 }, { "epoch": 3.6438490844291507, "grad_norm": 0.06458812747505101, "learning_rate": 3.712404352224976e-05, "loss": 0.8388, "step": 207550 }, { "epoch": 3.644024649309152, "grad_norm": 0.0457438418267871, "learning_rate": 3.7115530148663e-05, "loss": 0.8373, "step": 207560 }, { "epoch": 3.6442002141891536, "grad_norm": 0.06066990853705161, "learning_rate": 3.7107017584504656e-05, "loss": 0.8333, "step": 207570 }, { "epoch": 3.644375779069155, "grad_norm": 0.04579613667671043, "learning_rate": 3.709850582988271e-05, "loss": 0.8301, "step": 207580 }, { "epoch": 3.644551343949156, "grad_norm": 0.07007426837512568, "learning_rate": 3.7089994884904904e-05, "loss": 0.8356, "step": 207590 }, { "epoch": 3.644726908829158, "grad_norm": 0.05385638596104467, "learning_rate": 3.708148474967912e-05, "loss": 0.8351, "step": 207600 }, { "epoch": 3.644902473709159, "grad_norm": 0.08359913164077053, "learning_rate": 3.7072975424313193e-05, "loss": 0.8402, "step": 207610 }, { "epoch": 3.6450780385891606, "grad_norm": 0.06448518563558289, "learning_rate": 3.706446690891489e-05, "loss": 0.8438, "step": 207620 }, { "epoch": 3.645253603469162, "grad_norm": 0.049552692419663986, "learning_rate": 3.705595920359208e-05, "loss": 0.8471, "step": 207630 }, { "epoch": 3.6454291683491635, "grad_norm": 0.05717464503449682, "learning_rate": 3.704745230845245e-05, "loss": 0.8395, "step": 207640 }, { "epoch": 3.645604733229165, "grad_norm": 0.05321924917936218, "learning_rate": 3.703894622360394e-05, "loss": 0.8409, "step": 207650 }, { "epoch": 3.645780298109166, "grad_norm": 0.06124325868479975, "learning_rate": 3.703044094915422e-05, "loss": 0.8307, "step": 207660 }, { "epoch": 3.6459558629891675, "grad_norm": 0.05517198255433807, "learning_rate": 3.702193648521102e-05, "loss": 0.8443, "step": 207670 }, { "epoch": 3.646131427869169, "grad_norm": 0.05714146477168648, "learning_rate": 3.70134328318822e-05, "loss": 0.8433, "step": 207680 }, { "epoch": 3.6463069927491705, "grad_norm": 0.04551001661291158, "learning_rate": 3.700492998927543e-05, "loss": 0.8448, "step": 207690 }, { "epoch": 3.646482557629172, "grad_norm": 0.06249414452714749, "learning_rate": 3.699642795749848e-05, "loss": 0.8363, "step": 207700 }, { "epoch": 3.6466581225091734, "grad_norm": 0.07639409020859114, "learning_rate": 3.6987926736659035e-05, "loss": 0.8419, "step": 207710 }, { "epoch": 3.646833687389175, "grad_norm": 0.05342337842743454, "learning_rate": 3.697942632686481e-05, "loss": 0.8325, "step": 207720 }, { "epoch": 3.647009252269176, "grad_norm": 0.04933454452230972, "learning_rate": 3.6970926728223566e-05, "loss": 0.8416, "step": 207730 }, { "epoch": 3.6471848171491774, "grad_norm": 0.04728596775590516, "learning_rate": 3.6962427940842894e-05, "loss": 0.8358, "step": 207740 }, { "epoch": 3.647360382029179, "grad_norm": 0.0471377770321644, "learning_rate": 3.6953929964830576e-05, "loss": 0.8389, "step": 207750 }, { "epoch": 3.6475359469091804, "grad_norm": 0.058753210529965426, "learning_rate": 3.694543280029422e-05, "loss": 0.8378, "step": 207760 }, { "epoch": 3.647711511789182, "grad_norm": 0.05261366440297615, "learning_rate": 3.6936936447341505e-05, "loss": 0.8414, "step": 207770 }, { "epoch": 3.647887076669183, "grad_norm": 0.04715569162594859, "learning_rate": 3.692844090608012e-05, "loss": 0.8392, "step": 207780 }, { "epoch": 3.6480626415491844, "grad_norm": 0.044951555915934256, "learning_rate": 3.691994617661763e-05, "loss": 0.8448, "step": 207790 }, { "epoch": 3.648238206429186, "grad_norm": 0.0520457009921428, "learning_rate": 3.691145225906174e-05, "loss": 0.84, "step": 207800 }, { "epoch": 3.6484137713091873, "grad_norm": 0.05704691414645937, "learning_rate": 3.690295915351997e-05, "loss": 0.8464, "step": 207810 }, { "epoch": 3.648589336189189, "grad_norm": 0.05572295058819523, "learning_rate": 3.689446686010005e-05, "loss": 0.8444, "step": 207820 }, { "epoch": 3.6487649010691903, "grad_norm": 0.04966330813133378, "learning_rate": 3.688597537890952e-05, "loss": 0.8364, "step": 207830 }, { "epoch": 3.6489404659491917, "grad_norm": 0.05232230674040612, "learning_rate": 3.687748471005591e-05, "loss": 0.846, "step": 207840 }, { "epoch": 3.649116030829193, "grad_norm": 0.06356125630718655, "learning_rate": 3.686899485364692e-05, "loss": 0.8456, "step": 207850 }, { "epoch": 3.6492915957091943, "grad_norm": 0.045060897979728405, "learning_rate": 3.6860505809790024e-05, "loss": 0.8441, "step": 207860 }, { "epoch": 3.6494671605891957, "grad_norm": 0.05674898152878474, "learning_rate": 3.685201757859284e-05, "loss": 0.8381, "step": 207870 }, { "epoch": 3.649642725469197, "grad_norm": 0.04458809466164217, "learning_rate": 3.684353016016287e-05, "loss": 0.8408, "step": 207880 }, { "epoch": 3.6498182903491987, "grad_norm": 0.05381535285125529, "learning_rate": 3.683504355460764e-05, "loss": 0.8396, "step": 207890 }, { "epoch": 3.6499938552291997, "grad_norm": 0.038697282479788986, "learning_rate": 3.682655776203475e-05, "loss": 0.8521, "step": 207900 }, { "epoch": 3.650169420109201, "grad_norm": 0.0729200379942147, "learning_rate": 3.6818072782551625e-05, "loss": 0.8382, "step": 207910 }, { "epoch": 3.6503449849892027, "grad_norm": 0.06782085971985817, "learning_rate": 3.6809588616265856e-05, "loss": 0.8365, "step": 207920 }, { "epoch": 3.650520549869204, "grad_norm": 0.07162589155073502, "learning_rate": 3.6801105263284894e-05, "loss": 0.8475, "step": 207930 }, { "epoch": 3.6506961147492056, "grad_norm": 0.05038640687444504, "learning_rate": 3.6792622723716217e-05, "loss": 0.8452, "step": 207940 }, { "epoch": 3.650871679629207, "grad_norm": 0.057077505325780035, "learning_rate": 3.6784140997667344e-05, "loss": 0.8401, "step": 207950 }, { "epoch": 3.6510472445092086, "grad_norm": 0.07501665900011711, "learning_rate": 3.6775660085245685e-05, "loss": 0.8359, "step": 207960 }, { "epoch": 3.6512228093892096, "grad_norm": 0.05398682937908255, "learning_rate": 3.676717998655876e-05, "loss": 0.842, "step": 207970 }, { "epoch": 3.651398374269211, "grad_norm": 0.048477727575171756, "learning_rate": 3.6758700701713906e-05, "loss": 0.8326, "step": 207980 }, { "epoch": 3.6515739391492126, "grad_norm": 0.050545255402797806, "learning_rate": 3.675022223081869e-05, "loss": 0.8398, "step": 207990 }, { "epoch": 3.651749504029214, "grad_norm": 0.05535414293461249, "learning_rate": 3.674174457398047e-05, "loss": 0.8451, "step": 208000 }, { "epoch": 3.6519250689092155, "grad_norm": 0.06010722368631157, "learning_rate": 3.673326773130659e-05, "loss": 0.8522, "step": 208010 }, { "epoch": 3.6521006337892166, "grad_norm": 0.04745787697738982, "learning_rate": 3.6724791702904606e-05, "loss": 0.8428, "step": 208020 }, { "epoch": 3.652276198669218, "grad_norm": 0.05797526261801778, "learning_rate": 3.671631648888178e-05, "loss": 0.8385, "step": 208030 }, { "epoch": 3.6524517635492195, "grad_norm": 0.05117262950917112, "learning_rate": 3.670784208934559e-05, "loss": 0.84, "step": 208040 }, { "epoch": 3.652627328429221, "grad_norm": 0.06966015755943702, "learning_rate": 3.669936850440331e-05, "loss": 0.8374, "step": 208050 }, { "epoch": 3.6528028933092225, "grad_norm": 0.04258384030799303, "learning_rate": 3.6690895734162376e-05, "loss": 0.8332, "step": 208060 }, { "epoch": 3.652978458189224, "grad_norm": 0.04831305024540532, "learning_rate": 3.668242377873014e-05, "loss": 0.85, "step": 208070 }, { "epoch": 3.6531540230692254, "grad_norm": 0.055906127433624615, "learning_rate": 3.667395263821386e-05, "loss": 0.8394, "step": 208080 }, { "epoch": 3.6533295879492265, "grad_norm": 0.05358729994167299, "learning_rate": 3.666548231272099e-05, "loss": 0.8438, "step": 208090 }, { "epoch": 3.653505152829228, "grad_norm": 0.04477733904239037, "learning_rate": 3.665701280235876e-05, "loss": 0.8481, "step": 208100 }, { "epoch": 3.6536807177092294, "grad_norm": 0.07407789356933361, "learning_rate": 3.66485441072345e-05, "loss": 0.841, "step": 208110 }, { "epoch": 3.653856282589231, "grad_norm": 0.046454584462398286, "learning_rate": 3.664007622745556e-05, "loss": 0.8433, "step": 208120 }, { "epoch": 3.6540318474692324, "grad_norm": 0.05199743267115952, "learning_rate": 3.6631609163129155e-05, "loss": 0.8445, "step": 208130 }, { "epoch": 3.6542074123492334, "grad_norm": 0.05292160078304449, "learning_rate": 3.6623142914362634e-05, "loss": 0.8354, "step": 208140 }, { "epoch": 3.6543829772292353, "grad_norm": 0.04879994080188687, "learning_rate": 3.661467748126316e-05, "loss": 0.8373, "step": 208150 }, { "epoch": 3.6545585421092364, "grad_norm": 0.07496208931159937, "learning_rate": 3.660621286393813e-05, "loss": 0.8399, "step": 208160 }, { "epoch": 3.654734106989238, "grad_norm": 0.04452517595804591, "learning_rate": 3.659774906249472e-05, "loss": 0.8468, "step": 208170 }, { "epoch": 3.6549096718692393, "grad_norm": 0.05967222932536949, "learning_rate": 3.6589286077040114e-05, "loss": 0.8367, "step": 208180 }, { "epoch": 3.655085236749241, "grad_norm": 0.05381750289073436, "learning_rate": 3.6580823907681655e-05, "loss": 0.8436, "step": 208190 }, { "epoch": 3.6552608016292423, "grad_norm": 0.050067816937429684, "learning_rate": 3.6572362554526475e-05, "loss": 0.8388, "step": 208200 }, { "epoch": 3.6554363665092433, "grad_norm": 0.05188376114642477, "learning_rate": 3.6563902017681836e-05, "loss": 0.8354, "step": 208210 }, { "epoch": 3.655611931389245, "grad_norm": 0.04598389192530864, "learning_rate": 3.6555442297254884e-05, "loss": 0.8433, "step": 208220 }, { "epoch": 3.6557874962692463, "grad_norm": 0.044667311270740834, "learning_rate": 3.6546983393352825e-05, "loss": 0.8389, "step": 208230 }, { "epoch": 3.6559630611492477, "grad_norm": 0.0620876979661096, "learning_rate": 3.653852530608287e-05, "loss": 0.8375, "step": 208240 }, { "epoch": 3.656138626029249, "grad_norm": 0.07200496211781039, "learning_rate": 3.653006803555208e-05, "loss": 0.8369, "step": 208250 }, { "epoch": 3.6563141909092503, "grad_norm": 0.06310951323834026, "learning_rate": 3.6521611581867755e-05, "loss": 0.8415, "step": 208260 }, { "epoch": 3.656489755789252, "grad_norm": 0.04671926052530355, "learning_rate": 3.651315594513693e-05, "loss": 0.8453, "step": 208270 }, { "epoch": 3.656665320669253, "grad_norm": 0.04679221290179048, "learning_rate": 3.6504701125466795e-05, "loss": 0.8405, "step": 208280 }, { "epoch": 3.6568408855492547, "grad_norm": 0.044634406139705346, "learning_rate": 3.649624712296448e-05, "loss": 0.8427, "step": 208290 }, { "epoch": 3.657016450429256, "grad_norm": 0.06740093183323445, "learning_rate": 3.6487793937737044e-05, "loss": 0.8486, "step": 208300 }, { "epoch": 3.6571920153092576, "grad_norm": 0.06450459700017051, "learning_rate": 3.647934156989165e-05, "loss": 0.84, "step": 208310 }, { "epoch": 3.657367580189259, "grad_norm": 0.05818036734951797, "learning_rate": 3.6470890019535307e-05, "loss": 0.8468, "step": 208320 }, { "epoch": 3.65754314506926, "grad_norm": 0.053287390725955536, "learning_rate": 3.646243928677522e-05, "loss": 0.8353, "step": 208330 }, { "epoch": 3.6577187099492616, "grad_norm": 0.0512535714779883, "learning_rate": 3.645398937171839e-05, "loss": 0.8419, "step": 208340 }, { "epoch": 3.657894274829263, "grad_norm": 0.04573258798853829, "learning_rate": 3.644554027447182e-05, "loss": 0.841, "step": 208350 }, { "epoch": 3.6580698397092646, "grad_norm": 0.04223633725473875, "learning_rate": 3.643709199514269e-05, "loss": 0.8411, "step": 208360 }, { "epoch": 3.658245404589266, "grad_norm": 0.059951669324911624, "learning_rate": 3.6428644533837954e-05, "loss": 0.8352, "step": 208370 }, { "epoch": 3.658420969469267, "grad_norm": 0.06859793157145337, "learning_rate": 3.642019789066469e-05, "loss": 0.8371, "step": 208380 }, { "epoch": 3.658596534349269, "grad_norm": 0.05907623596167393, "learning_rate": 3.641175206572986e-05, "loss": 0.8376, "step": 208390 }, { "epoch": 3.65877209922927, "grad_norm": 0.0570502019717151, "learning_rate": 3.640330705914052e-05, "loss": 0.8421, "step": 208400 }, { "epoch": 3.6589476641092715, "grad_norm": 0.055690345786462966, "learning_rate": 3.63948628710037e-05, "loss": 0.8412, "step": 208410 }, { "epoch": 3.659123228989273, "grad_norm": 0.057166883906601754, "learning_rate": 3.6386419501426266e-05, "loss": 0.8386, "step": 208420 }, { "epoch": 3.6592987938692745, "grad_norm": 0.043737855240824686, "learning_rate": 3.637797695051535e-05, "loss": 0.8367, "step": 208430 }, { "epoch": 3.659474358749276, "grad_norm": 0.05115362610634701, "learning_rate": 3.636953521837781e-05, "loss": 0.8397, "step": 208440 }, { "epoch": 3.659649923629277, "grad_norm": 0.04828477445869321, "learning_rate": 3.636109430512066e-05, "loss": 0.8375, "step": 208450 }, { "epoch": 3.6598254885092785, "grad_norm": 0.05588431551675621, "learning_rate": 3.6352654210850864e-05, "loss": 0.8419, "step": 208460 }, { "epoch": 3.66000105338928, "grad_norm": 0.05148097592621106, "learning_rate": 3.63442149356753e-05, "loss": 0.8393, "step": 208470 }, { "epoch": 3.6601766182692814, "grad_norm": 0.08544739427833326, "learning_rate": 3.633577647970095e-05, "loss": 0.8382, "step": 208480 }, { "epoch": 3.660352183149283, "grad_norm": 0.05593346554677629, "learning_rate": 3.6327338843034624e-05, "loss": 0.836, "step": 208490 }, { "epoch": 3.6605277480292844, "grad_norm": 0.04577013254973488, "learning_rate": 3.63189020257834e-05, "loss": 0.8416, "step": 208500 }, { "epoch": 3.660703312909286, "grad_norm": 0.06254390144070464, "learning_rate": 3.631046602805407e-05, "loss": 0.846, "step": 208510 }, { "epoch": 3.660878877789287, "grad_norm": 0.0478976001704165, "learning_rate": 3.630203084995347e-05, "loss": 0.8385, "step": 208520 }, { "epoch": 3.6610544426692884, "grad_norm": 0.05192258910685251, "learning_rate": 3.6293596491588605e-05, "loss": 0.8396, "step": 208530 }, { "epoch": 3.66123000754929, "grad_norm": 0.05888999763278196, "learning_rate": 3.628516295306623e-05, "loss": 0.8372, "step": 208540 }, { "epoch": 3.6614055724292913, "grad_norm": 0.04941437295629919, "learning_rate": 3.62767302344933e-05, "loss": 0.8371, "step": 208550 }, { "epoch": 3.661581137309293, "grad_norm": 0.0573821011538737, "learning_rate": 3.6268298335976555e-05, "loss": 0.8442, "step": 208560 }, { "epoch": 3.661756702189294, "grad_norm": 0.05313222120162089, "learning_rate": 3.625986725762287e-05, "loss": 0.8379, "step": 208570 }, { "epoch": 3.6619322670692953, "grad_norm": 0.054818415688642756, "learning_rate": 3.6251436999539103e-05, "loss": 0.8465, "step": 208580 }, { "epoch": 3.662107831949297, "grad_norm": 0.06349366387508558, "learning_rate": 3.624300756183199e-05, "loss": 0.8396, "step": 208590 }, { "epoch": 3.6622833968292983, "grad_norm": 0.058323068151535755, "learning_rate": 3.623457894460844e-05, "loss": 0.8406, "step": 208600 }, { "epoch": 3.6624589617092997, "grad_norm": 0.04964005673222531, "learning_rate": 3.622615114797516e-05, "loss": 0.835, "step": 208610 }, { "epoch": 3.662634526589301, "grad_norm": 0.045485576671676936, "learning_rate": 3.621772417203895e-05, "loss": 0.8346, "step": 208620 }, { "epoch": 3.6628100914693027, "grad_norm": 0.06825594397715319, "learning_rate": 3.6209298016906615e-05, "loss": 0.8396, "step": 208630 }, { "epoch": 3.6629856563493037, "grad_norm": 0.05969809300166471, "learning_rate": 3.620087268268488e-05, "loss": 0.8409, "step": 208640 }, { "epoch": 3.663161221229305, "grad_norm": 0.05818269527656864, "learning_rate": 3.6192448169480526e-05, "loss": 0.8423, "step": 208650 }, { "epoch": 3.6633367861093067, "grad_norm": 0.059272105255753764, "learning_rate": 3.6184024477400214e-05, "loss": 0.8454, "step": 208660 }, { "epoch": 3.663512350989308, "grad_norm": 0.06905215194339684, "learning_rate": 3.617560160655079e-05, "loss": 0.8418, "step": 208670 }, { "epoch": 3.6636879158693096, "grad_norm": 0.04401353734941677, "learning_rate": 3.616717955703893e-05, "loss": 0.8399, "step": 208680 }, { "epoch": 3.6638634807493107, "grad_norm": 0.049347951637373474, "learning_rate": 3.6158758328971254e-05, "loss": 0.8382, "step": 208690 }, { "epoch": 3.664039045629312, "grad_norm": 0.05106824972695485, "learning_rate": 3.615033792245461e-05, "loss": 0.8397, "step": 208700 }, { "epoch": 3.6642146105093136, "grad_norm": 0.061346775547312035, "learning_rate": 3.6141918337595575e-05, "loss": 0.8443, "step": 208710 }, { "epoch": 3.664390175389315, "grad_norm": 0.04813900473853539, "learning_rate": 3.613349957450091e-05, "loss": 0.8466, "step": 208720 }, { "epoch": 3.6645657402693166, "grad_norm": 0.057897858187791515, "learning_rate": 3.612508163327718e-05, "loss": 0.8392, "step": 208730 }, { "epoch": 3.664741305149318, "grad_norm": 0.06286048869780683, "learning_rate": 3.611666451403112e-05, "loss": 0.8402, "step": 208740 }, { "epoch": 3.6649168700293195, "grad_norm": 0.049828994675352706, "learning_rate": 3.6108248216869375e-05, "loss": 0.8438, "step": 208750 }, { "epoch": 3.6650924349093206, "grad_norm": 0.04430303371893461, "learning_rate": 3.60998327418985e-05, "loss": 0.8435, "step": 208760 }, { "epoch": 3.665267999789322, "grad_norm": 0.04963174857208082, "learning_rate": 3.609141808922526e-05, "loss": 0.8457, "step": 208770 }, { "epoch": 3.6654435646693235, "grad_norm": 0.08154664489137695, "learning_rate": 3.6083004258956143e-05, "loss": 0.8354, "step": 208780 }, { "epoch": 3.665619129549325, "grad_norm": 0.05066194203693672, "learning_rate": 3.607459125119781e-05, "loss": 0.8338, "step": 208790 }, { "epoch": 3.6657946944293265, "grad_norm": 0.05051109872065273, "learning_rate": 3.606617906605688e-05, "loss": 0.8424, "step": 208800 }, { "epoch": 3.6659702593093275, "grad_norm": 0.05710826119669858, "learning_rate": 3.605776770363987e-05, "loss": 0.8419, "step": 208810 }, { "epoch": 3.6661458241893294, "grad_norm": 0.052896895039653204, "learning_rate": 3.604935716405343e-05, "loss": 0.8359, "step": 208820 }, { "epoch": 3.6663213890693305, "grad_norm": 0.05666591105481921, "learning_rate": 3.604094744740401e-05, "loss": 0.8445, "step": 208830 }, { "epoch": 3.666496953949332, "grad_norm": 0.07069487347034938, "learning_rate": 3.6032538553798315e-05, "loss": 0.8365, "step": 208840 }, { "epoch": 3.6666725188293334, "grad_norm": 0.05031799261997027, "learning_rate": 3.602413048334279e-05, "loss": 0.8489, "step": 208850 }, { "epoch": 3.666848083709335, "grad_norm": 0.0462364968655923, "learning_rate": 3.601572323614392e-05, "loss": 0.8412, "step": 208860 }, { "epoch": 3.6670236485893364, "grad_norm": 0.05956316205802832, "learning_rate": 3.6007316812308364e-05, "loss": 0.8386, "step": 208870 }, { "epoch": 3.6671992134693374, "grad_norm": 0.048050309341962624, "learning_rate": 3.599891121194252e-05, "loss": 0.8448, "step": 208880 }, { "epoch": 3.667374778349339, "grad_norm": 0.06247629596303087, "learning_rate": 3.599050643515297e-05, "loss": 0.8365, "step": 208890 }, { "epoch": 3.6675503432293404, "grad_norm": 0.09732977738862077, "learning_rate": 3.5982102482046126e-05, "loss": 0.8412, "step": 208900 }, { "epoch": 3.667725908109342, "grad_norm": 0.04642005973541344, "learning_rate": 3.59736993527285e-05, "loss": 0.8417, "step": 208910 }, { "epoch": 3.6679014729893433, "grad_norm": 0.07454068649892699, "learning_rate": 3.59652970473066e-05, "loss": 0.8388, "step": 208920 }, { "epoch": 3.6680770378693444, "grad_norm": 0.05385261054386089, "learning_rate": 3.595689556588679e-05, "loss": 0.8479, "step": 208930 }, { "epoch": 3.6682526027493463, "grad_norm": 0.0510012981938576, "learning_rate": 3.594849490857564e-05, "loss": 0.8447, "step": 208940 }, { "epoch": 3.6684281676293473, "grad_norm": 0.05613786628140446, "learning_rate": 3.59400950754795e-05, "loss": 0.8383, "step": 208950 }, { "epoch": 3.668603732509349, "grad_norm": 0.0640982903858166, "learning_rate": 3.593169606670481e-05, "loss": 0.8447, "step": 208960 }, { "epoch": 3.6687792973893503, "grad_norm": 0.06008124631737059, "learning_rate": 3.5923297882358044e-05, "loss": 0.8372, "step": 208970 }, { "epoch": 3.6689548622693517, "grad_norm": 0.05550561540715296, "learning_rate": 3.591490052254553e-05, "loss": 0.8425, "step": 208980 }, { "epoch": 3.6691304271493532, "grad_norm": 0.06694942016244387, "learning_rate": 3.590650398737374e-05, "loss": 0.8409, "step": 208990 }, { "epoch": 3.6693059920293543, "grad_norm": 0.05095930210256873, "learning_rate": 3.5898108276948945e-05, "loss": 0.8311, "step": 209000 }, { "epoch": 3.6694815569093557, "grad_norm": 0.04966062436683267, "learning_rate": 3.5889713391377664e-05, "loss": 0.8494, "step": 209010 }, { "epoch": 3.669657121789357, "grad_norm": 0.06506234333678754, "learning_rate": 3.5881319330766186e-05, "loss": 0.8377, "step": 209020 }, { "epoch": 3.6698326866693587, "grad_norm": 0.049012715818810594, "learning_rate": 3.587292609522081e-05, "loss": 0.8421, "step": 209030 }, { "epoch": 3.67000825154936, "grad_norm": 0.051358718061736204, "learning_rate": 3.586453368484802e-05, "loss": 0.8384, "step": 209040 }, { "epoch": 3.670183816429361, "grad_norm": 0.049941857563207966, "learning_rate": 3.5856142099754025e-05, "loss": 0.8403, "step": 209050 }, { "epoch": 3.670359381309363, "grad_norm": 0.05451926941047834, "learning_rate": 3.5847751340045224e-05, "loss": 0.8391, "step": 209060 }, { "epoch": 3.670534946189364, "grad_norm": 0.05350046726385339, "learning_rate": 3.583936140582788e-05, "loss": 0.8386, "step": 209070 }, { "epoch": 3.6707105110693656, "grad_norm": 0.05782311890564411, "learning_rate": 3.583097229720831e-05, "loss": 0.8346, "step": 209080 }, { "epoch": 3.670886075949367, "grad_norm": 0.06678645120959097, "learning_rate": 3.5822584014292835e-05, "loss": 0.8373, "step": 209090 }, { "epoch": 3.6710616408293686, "grad_norm": 0.05190534905949794, "learning_rate": 3.5814196557187654e-05, "loss": 0.8362, "step": 209100 }, { "epoch": 3.67123720570937, "grad_norm": 0.04699605688914498, "learning_rate": 3.580580992599916e-05, "loss": 0.8437, "step": 209110 }, { "epoch": 3.671412770589371, "grad_norm": 0.05822669496876252, "learning_rate": 3.579742412083352e-05, "loss": 0.8395, "step": 209120 }, { "epoch": 3.6715883354693726, "grad_norm": 0.05436052138408788, "learning_rate": 3.5789039141797e-05, "loss": 0.8438, "step": 209130 }, { "epoch": 3.671763900349374, "grad_norm": 0.05217609259126647, "learning_rate": 3.57806549889959e-05, "loss": 0.8442, "step": 209140 }, { "epoch": 3.6719394652293755, "grad_norm": 0.054378250190861926, "learning_rate": 3.5772271662536354e-05, "loss": 0.8397, "step": 209150 }, { "epoch": 3.672115030109377, "grad_norm": 0.045113726797002394, "learning_rate": 3.576388916252467e-05, "loss": 0.839, "step": 209160 }, { "epoch": 3.6722905949893785, "grad_norm": 0.05008542468036958, "learning_rate": 3.575550748906695e-05, "loss": 0.8376, "step": 209170 }, { "epoch": 3.67246615986938, "grad_norm": 0.06853338596325818, "learning_rate": 3.5747126642269515e-05, "loss": 0.8378, "step": 209180 }, { "epoch": 3.672641724749381, "grad_norm": 0.06380437325818106, "learning_rate": 3.5738746622238483e-05, "loss": 0.835, "step": 209190 }, { "epoch": 3.6728172896293825, "grad_norm": 0.05683944121639501, "learning_rate": 3.573036742907998e-05, "loss": 0.8366, "step": 209200 }, { "epoch": 3.672992854509384, "grad_norm": 0.05891443277948683, "learning_rate": 3.572198906290029e-05, "loss": 0.8392, "step": 209210 }, { "epoch": 3.6731684193893854, "grad_norm": 0.05463926489464742, "learning_rate": 3.571361152380549e-05, "loss": 0.8365, "step": 209220 }, { "epoch": 3.673343984269387, "grad_norm": 0.04526191013166191, "learning_rate": 3.570523481190176e-05, "loss": 0.8344, "step": 209230 }, { "epoch": 3.673519549149388, "grad_norm": 0.053139806407865024, "learning_rate": 3.5696858927295206e-05, "loss": 0.8427, "step": 209240 }, { "epoch": 3.6736951140293894, "grad_norm": 0.052045039786561303, "learning_rate": 3.5688483870091944e-05, "loss": 0.8363, "step": 209250 }, { "epoch": 3.673870678909391, "grad_norm": 0.04639947446605266, "learning_rate": 3.568010964039816e-05, "loss": 0.8406, "step": 209260 }, { "epoch": 3.6740462437893924, "grad_norm": 0.05969403310274521, "learning_rate": 3.5671736238319826e-05, "loss": 0.8331, "step": 209270 }, { "epoch": 3.674221808669394, "grad_norm": 0.06438463920663688, "learning_rate": 3.566336366396318e-05, "loss": 0.8334, "step": 209280 }, { "epoch": 3.6743973735493953, "grad_norm": 0.06570155678892459, "learning_rate": 3.565499191743421e-05, "loss": 0.8482, "step": 209290 }, { "epoch": 3.674572938429397, "grad_norm": 0.04436215419674458, "learning_rate": 3.564662099883901e-05, "loss": 0.8427, "step": 209300 }, { "epoch": 3.674748503309398, "grad_norm": 0.0526405631930618, "learning_rate": 3.5638250908283686e-05, "loss": 0.8467, "step": 209310 }, { "epoch": 3.6749240681893993, "grad_norm": 0.0461042771129446, "learning_rate": 3.5629881645874214e-05, "loss": 0.8339, "step": 209320 }, { "epoch": 3.675099633069401, "grad_norm": 0.05455268622875595, "learning_rate": 3.562151321171671e-05, "loss": 0.8406, "step": 209330 }, { "epoch": 3.6752751979494023, "grad_norm": 0.05276252739433889, "learning_rate": 3.5613145605917087e-05, "loss": 0.8452, "step": 209340 }, { "epoch": 3.6754507628294038, "grad_norm": 0.04583324182886622, "learning_rate": 3.5604778828581515e-05, "loss": 0.8402, "step": 209350 }, { "epoch": 3.675626327709405, "grad_norm": 0.048238608692808174, "learning_rate": 3.5596412879815924e-05, "loss": 0.8423, "step": 209360 }, { "epoch": 3.6758018925894063, "grad_norm": 0.06673467932177382, "learning_rate": 3.558804775972625e-05, "loss": 0.8472, "step": 209370 }, { "epoch": 3.6759774574694077, "grad_norm": 0.042503016590045725, "learning_rate": 3.557968346841861e-05, "loss": 0.8423, "step": 209380 }, { "epoch": 3.676153022349409, "grad_norm": 0.04891614911561813, "learning_rate": 3.557132000599888e-05, "loss": 0.8367, "step": 209390 }, { "epoch": 3.6763285872294107, "grad_norm": 0.05563710852315078, "learning_rate": 3.5562957372573114e-05, "loss": 0.8321, "step": 209400 }, { "epoch": 3.676504152109412, "grad_norm": 0.06825643602309228, "learning_rate": 3.555459556824717e-05, "loss": 0.8405, "step": 209410 }, { "epoch": 3.6766797169894136, "grad_norm": 0.047447908560182585, "learning_rate": 3.554623459312705e-05, "loss": 0.8311, "step": 209420 }, { "epoch": 3.6768552818694147, "grad_norm": 0.05084135322878747, "learning_rate": 3.553787444731872e-05, "loss": 0.8327, "step": 209430 }, { "epoch": 3.677030846749416, "grad_norm": 0.04261076035161414, "learning_rate": 3.5529515130928e-05, "loss": 0.8396, "step": 209440 }, { "epoch": 3.6772064116294176, "grad_norm": 0.04892802909483145, "learning_rate": 3.552115664406093e-05, "loss": 0.8425, "step": 209450 }, { "epoch": 3.677381976509419, "grad_norm": 0.044897166050385115, "learning_rate": 3.551279898682332e-05, "loss": 0.8327, "step": 209460 }, { "epoch": 3.6775575413894206, "grad_norm": 0.07022123739631393, "learning_rate": 3.55044421593211e-05, "loss": 0.845, "step": 209470 }, { "epoch": 3.6777331062694216, "grad_norm": 0.05271928098046604, "learning_rate": 3.5496086161660184e-05, "loss": 0.8365, "step": 209480 }, { "epoch": 3.677908671149423, "grad_norm": 0.04283167719606166, "learning_rate": 3.5487730993946385e-05, "loss": 0.8441, "step": 209490 }, { "epoch": 3.6780842360294246, "grad_norm": 0.0632902305027325, "learning_rate": 3.5479376656285605e-05, "loss": 0.8346, "step": 209500 }, { "epoch": 3.678259800909426, "grad_norm": 0.058209295043646476, "learning_rate": 3.547102314878363e-05, "loss": 0.8359, "step": 209510 }, { "epoch": 3.6784353657894275, "grad_norm": 0.05802471911337925, "learning_rate": 3.5462670471546406e-05, "loss": 0.8434, "step": 209520 }, { "epoch": 3.678610930669429, "grad_norm": 0.04869962598769102, "learning_rate": 3.545431862467972e-05, "loss": 0.8432, "step": 209530 }, { "epoch": 3.6787864955494305, "grad_norm": 0.05736165577638583, "learning_rate": 3.54459676082893e-05, "loss": 0.8426, "step": 209540 }, { "epoch": 3.6789620604294315, "grad_norm": 0.04515809953465004, "learning_rate": 3.543761742248111e-05, "loss": 0.8398, "step": 209550 }, { "epoch": 3.679137625309433, "grad_norm": 0.05524635997288191, "learning_rate": 3.5429268067360826e-05, "loss": 0.846, "step": 209560 }, { "epoch": 3.6793131901894345, "grad_norm": 0.06824330425870924, "learning_rate": 3.542091954303432e-05, "loss": 0.8479, "step": 209570 }, { "epoch": 3.679488755069436, "grad_norm": 0.045433468916140515, "learning_rate": 3.5412571849607305e-05, "loss": 0.8428, "step": 209580 }, { "epoch": 3.6796643199494374, "grad_norm": 0.05241392468354748, "learning_rate": 3.540422498718557e-05, "loss": 0.8399, "step": 209590 }, { "epoch": 3.6798398848294385, "grad_norm": 0.06314386681889751, "learning_rate": 3.539587895587491e-05, "loss": 0.8414, "step": 209600 }, { "epoch": 3.6800154497094404, "grad_norm": 0.05183188333424722, "learning_rate": 3.5387533755780966e-05, "loss": 0.8353, "step": 209610 }, { "epoch": 3.6801910145894414, "grad_norm": 0.04438937766083191, "learning_rate": 3.537918938700961e-05, "loss": 0.8486, "step": 209620 }, { "epoch": 3.680366579469443, "grad_norm": 0.051266467520106274, "learning_rate": 3.537084584966648e-05, "loss": 0.8369, "step": 209630 }, { "epoch": 3.6805421443494444, "grad_norm": 0.05028991361303178, "learning_rate": 3.5362503143857305e-05, "loss": 0.8477, "step": 209640 }, { "epoch": 3.680717709229446, "grad_norm": 0.05787851043456389, "learning_rate": 3.535416126968782e-05, "loss": 0.8391, "step": 209650 }, { "epoch": 3.6808932741094473, "grad_norm": 0.04908856769294585, "learning_rate": 3.534582022726367e-05, "loss": 0.8475, "step": 209660 }, { "epoch": 3.6810688389894484, "grad_norm": 0.052496279195455416, "learning_rate": 3.533748001669059e-05, "loss": 0.8381, "step": 209670 }, { "epoch": 3.68124440386945, "grad_norm": 0.05543051244924703, "learning_rate": 3.5329140638074144e-05, "loss": 0.8395, "step": 209680 }, { "epoch": 3.6814199687494513, "grad_norm": 0.05555950331423421, "learning_rate": 3.5320802091520156e-05, "loss": 0.8393, "step": 209690 }, { "epoch": 3.681595533629453, "grad_norm": 0.04425555653795772, "learning_rate": 3.5312464377134175e-05, "loss": 0.8391, "step": 209700 }, { "epoch": 3.6817710985094543, "grad_norm": 0.06440033837124004, "learning_rate": 3.53041274950218e-05, "loss": 0.8426, "step": 209710 }, { "epoch": 3.6819466633894553, "grad_norm": 0.04031719910680272, "learning_rate": 3.52957914452888e-05, "loss": 0.842, "step": 209720 }, { "epoch": 3.6821222282694572, "grad_norm": 0.04651255561848049, "learning_rate": 3.528745622804067e-05, "loss": 0.8325, "step": 209730 }, { "epoch": 3.6822977931494583, "grad_norm": 0.05086678722693569, "learning_rate": 3.527912184338309e-05, "loss": 0.8455, "step": 209740 }, { "epoch": 3.6824733580294597, "grad_norm": 0.06145401315238536, "learning_rate": 3.527078829142161e-05, "loss": 0.8408, "step": 209750 }, { "epoch": 3.682648922909461, "grad_norm": 0.05632151939105467, "learning_rate": 3.526245557226183e-05, "loss": 0.842, "step": 209760 }, { "epoch": 3.6828244877894627, "grad_norm": 0.059086528572803235, "learning_rate": 3.5254123686009377e-05, "loss": 0.8397, "step": 209770 }, { "epoch": 3.683000052669464, "grad_norm": 0.04791571772472563, "learning_rate": 3.5245792632769714e-05, "loss": 0.8398, "step": 209780 }, { "epoch": 3.683175617549465, "grad_norm": 0.05337017898768397, "learning_rate": 3.523746241264852e-05, "loss": 0.8388, "step": 209790 }, { "epoch": 3.6833511824294667, "grad_norm": 0.04858693193663825, "learning_rate": 3.5229133025751246e-05, "loss": 0.8332, "step": 209800 }, { "epoch": 3.683526747309468, "grad_norm": 0.0540985641620731, "learning_rate": 3.522080447218346e-05, "loss": 0.8415, "step": 209810 }, { "epoch": 3.6837023121894696, "grad_norm": 0.048507781743368665, "learning_rate": 3.521247675205073e-05, "loss": 0.8386, "step": 209820 }, { "epoch": 3.683877877069471, "grad_norm": 0.05090566155797095, "learning_rate": 3.5204149865458494e-05, "loss": 0.8407, "step": 209830 }, { "epoch": 3.684053441949472, "grad_norm": 0.0480323333372483, "learning_rate": 3.519582381251231e-05, "loss": 0.843, "step": 209840 }, { "epoch": 3.684229006829474, "grad_norm": 0.04538673183861194, "learning_rate": 3.518749859331759e-05, "loss": 0.8374, "step": 209850 }, { "epoch": 3.684404571709475, "grad_norm": 0.05622929718241364, "learning_rate": 3.517917420797994e-05, "loss": 0.8316, "step": 209860 }, { "epoch": 3.6845801365894766, "grad_norm": 0.046967025773946035, "learning_rate": 3.517085065660478e-05, "loss": 0.8306, "step": 209870 }, { "epoch": 3.684755701469478, "grad_norm": 0.053799044547350565, "learning_rate": 3.5162527939297476e-05, "loss": 0.8348, "step": 209880 }, { "epoch": 3.6849312663494795, "grad_norm": 0.05875970514613972, "learning_rate": 3.515420605616363e-05, "loss": 0.8455, "step": 209890 }, { "epoch": 3.685106831229481, "grad_norm": 0.05031070524760897, "learning_rate": 3.5145885007308584e-05, "loss": 0.8408, "step": 209900 }, { "epoch": 3.685282396109482, "grad_norm": 0.04737942034684283, "learning_rate": 3.513756479283782e-05, "loss": 0.8363, "step": 209910 }, { "epoch": 3.6854579609894835, "grad_norm": 0.050476157987447985, "learning_rate": 3.512924541285671e-05, "loss": 0.8495, "step": 209920 }, { "epoch": 3.685633525869485, "grad_norm": 0.06345381311924793, "learning_rate": 3.5120926867470675e-05, "loss": 0.8394, "step": 209930 }, { "epoch": 3.6858090907494865, "grad_norm": 0.05495175195509386, "learning_rate": 3.511260915678517e-05, "loss": 0.8426, "step": 209940 }, { "epoch": 3.685984655629488, "grad_norm": 0.04184916256133415, "learning_rate": 3.510429228090545e-05, "loss": 0.8386, "step": 209950 }, { "epoch": 3.6861602205094894, "grad_norm": 0.05338183503916748, "learning_rate": 3.509597623993706e-05, "loss": 0.8445, "step": 209960 }, { "epoch": 3.686335785389491, "grad_norm": 0.046390924762384274, "learning_rate": 3.5087661033985245e-05, "loss": 0.8304, "step": 209970 }, { "epoch": 3.686511350269492, "grad_norm": 0.04778240944001487, "learning_rate": 3.50793466631554e-05, "loss": 0.8423, "step": 209980 }, { "epoch": 3.6866869151494934, "grad_norm": 0.05088295780055026, "learning_rate": 3.5071033127552876e-05, "loss": 0.8371, "step": 209990 }, { "epoch": 3.686862480029495, "grad_norm": 0.05113699384689641, "learning_rate": 3.506272042728297e-05, "loss": 0.8469, "step": 210000 }, { "epoch": 3.6870380449094964, "grad_norm": 0.05287865018254363, "learning_rate": 3.505440856245109e-05, "loss": 0.8315, "step": 210010 }, { "epoch": 3.687213609789498, "grad_norm": 0.04796640546190377, "learning_rate": 3.504609753316241e-05, "loss": 0.8471, "step": 210020 }, { "epoch": 3.687389174669499, "grad_norm": 0.0630256706357599, "learning_rate": 3.503778733952238e-05, "loss": 0.8456, "step": 210030 }, { "epoch": 3.6875647395495004, "grad_norm": 0.053769184082084656, "learning_rate": 3.5029477981636225e-05, "loss": 0.8371, "step": 210040 }, { "epoch": 3.687740304429502, "grad_norm": 0.05775731651042885, "learning_rate": 3.502116945960917e-05, "loss": 0.8372, "step": 210050 }, { "epoch": 3.6879158693095033, "grad_norm": 0.07039092712835965, "learning_rate": 3.50128617735466e-05, "loss": 0.8462, "step": 210060 }, { "epoch": 3.688091434189505, "grad_norm": 0.051025847947780895, "learning_rate": 3.500455492355368e-05, "loss": 0.8353, "step": 210070 }, { "epoch": 3.6882669990695063, "grad_norm": 0.043335999178118555, "learning_rate": 3.499624890973575e-05, "loss": 0.8412, "step": 210080 }, { "epoch": 3.6884425639495078, "grad_norm": 0.05819689852347623, "learning_rate": 3.4987943732197955e-05, "loss": 0.8374, "step": 210090 }, { "epoch": 3.688618128829509, "grad_norm": 0.07082453208490216, "learning_rate": 3.497963939104555e-05, "loss": 0.8364, "step": 210100 }, { "epoch": 3.6887936937095103, "grad_norm": 0.07024028896967274, "learning_rate": 3.497133588638381e-05, "loss": 0.8392, "step": 210110 }, { "epoch": 3.6889692585895117, "grad_norm": 0.05646461529926115, "learning_rate": 3.496303321831782e-05, "loss": 0.8403, "step": 210120 }, { "epoch": 3.6891448234695132, "grad_norm": 0.05228576079499608, "learning_rate": 3.4954731386952945e-05, "loss": 0.8459, "step": 210130 }, { "epoch": 3.6893203883495147, "grad_norm": 0.06494811645641756, "learning_rate": 3.494643039239423e-05, "loss": 0.846, "step": 210140 }, { "epoch": 3.6894959532295157, "grad_norm": 0.05921939491000753, "learning_rate": 3.493813023474692e-05, "loss": 0.8462, "step": 210150 }, { "epoch": 3.689671518109517, "grad_norm": 0.06366318737305707, "learning_rate": 3.492983091411617e-05, "loss": 0.838, "step": 210160 }, { "epoch": 3.6898470829895187, "grad_norm": 0.05121097759929135, "learning_rate": 3.4921532430607103e-05, "loss": 0.8379, "step": 210170 }, { "epoch": 3.69002264786952, "grad_norm": 0.06804341002830244, "learning_rate": 3.491323478432492e-05, "loss": 0.8382, "step": 210180 }, { "epoch": 3.6901982127495216, "grad_norm": 0.047306282675720845, "learning_rate": 3.490493797537464e-05, "loss": 0.8477, "step": 210190 }, { "epoch": 3.690373777629523, "grad_norm": 0.04951966913731846, "learning_rate": 3.489664200386154e-05, "loss": 0.8371, "step": 210200 }, { "epoch": 3.6905493425095246, "grad_norm": 0.05083679948295019, "learning_rate": 3.4888346869890655e-05, "loss": 0.8411, "step": 210210 }, { "epoch": 3.6907249073895256, "grad_norm": 0.05708556933009567, "learning_rate": 3.488005257356701e-05, "loss": 0.8488, "step": 210220 }, { "epoch": 3.690900472269527, "grad_norm": 0.0471508940421651, "learning_rate": 3.487175911499584e-05, "loss": 0.8388, "step": 210230 }, { "epoch": 3.6910760371495286, "grad_norm": 0.054092171321633555, "learning_rate": 3.486346649428213e-05, "loss": 0.8391, "step": 210240 }, { "epoch": 3.69125160202953, "grad_norm": 0.05584477477393784, "learning_rate": 3.4855174711531e-05, "loss": 0.8417, "step": 210250 }, { "epoch": 3.6914271669095315, "grad_norm": 0.06369450294772958, "learning_rate": 3.484688376684745e-05, "loss": 0.8387, "step": 210260 }, { "epoch": 3.6916027317895326, "grad_norm": 0.05002187629833076, "learning_rate": 3.483859366033657e-05, "loss": 0.8404, "step": 210270 }, { "epoch": 3.6917782966695345, "grad_norm": 0.04486636271093967, "learning_rate": 3.4830304392103405e-05, "loss": 0.8361, "step": 210280 }, { "epoch": 3.6919538615495355, "grad_norm": 0.058453583012730286, "learning_rate": 3.4822015962252915e-05, "loss": 0.8369, "step": 210290 }, { "epoch": 3.692129426429537, "grad_norm": 0.06041372303009075, "learning_rate": 3.481372837089023e-05, "loss": 0.8401, "step": 210300 }, { "epoch": 3.6923049913095385, "grad_norm": 0.04646546655286257, "learning_rate": 3.480544161812025e-05, "loss": 0.8412, "step": 210310 }, { "epoch": 3.69248055618954, "grad_norm": 0.04295704181718986, "learning_rate": 3.4797155704048015e-05, "loss": 0.8355, "step": 210320 }, { "epoch": 3.6926561210695414, "grad_norm": 0.049841448171084574, "learning_rate": 3.478887062877853e-05, "loss": 0.8454, "step": 210330 }, { "epoch": 3.6928316859495425, "grad_norm": 0.05030247452886533, "learning_rate": 3.4780586392416716e-05, "loss": 0.8341, "step": 210340 }, { "epoch": 3.693007250829544, "grad_norm": 0.06628950647410442, "learning_rate": 3.47723029950676e-05, "loss": 0.8378, "step": 210350 }, { "epoch": 3.6931828157095454, "grad_norm": 0.07876878824639971, "learning_rate": 3.476402043683603e-05, "loss": 0.8407, "step": 210360 }, { "epoch": 3.693358380589547, "grad_norm": 0.04843939911863175, "learning_rate": 3.475573871782708e-05, "loss": 0.8356, "step": 210370 }, { "epoch": 3.6935339454695484, "grad_norm": 0.04275820323819817, "learning_rate": 3.474745783814561e-05, "loss": 0.8387, "step": 210380 }, { "epoch": 3.6937095103495494, "grad_norm": 0.04903354967694946, "learning_rate": 3.4739177797896496e-05, "loss": 0.8422, "step": 210390 }, { "epoch": 3.6938850752295513, "grad_norm": 0.076001576749364, "learning_rate": 3.473089859718475e-05, "loss": 0.8348, "step": 210400 }, { "epoch": 3.6940606401095524, "grad_norm": 0.05416793407229062, "learning_rate": 3.4722620236115184e-05, "loss": 0.8384, "step": 210410 }, { "epoch": 3.694236204989554, "grad_norm": 0.07690899144676797, "learning_rate": 3.4714342714792756e-05, "loss": 0.8416, "step": 210420 }, { "epoch": 3.6944117698695553, "grad_norm": 0.050007872098113346, "learning_rate": 3.470606603332229e-05, "loss": 0.8369, "step": 210430 }, { "epoch": 3.694587334749557, "grad_norm": 0.054861575066963944, "learning_rate": 3.4697790191808664e-05, "loss": 0.8376, "step": 210440 }, { "epoch": 3.6947628996295583, "grad_norm": 0.04521762132544243, "learning_rate": 3.468951519035677e-05, "loss": 0.8369, "step": 210450 }, { "epoch": 3.6949384645095593, "grad_norm": 0.04477505684011138, "learning_rate": 3.468124102907137e-05, "loss": 0.8411, "step": 210460 }, { "epoch": 3.695114029389561, "grad_norm": 0.051997585945196885, "learning_rate": 3.467296770805743e-05, "loss": 0.8373, "step": 210470 }, { "epoch": 3.6952895942695623, "grad_norm": 0.05242067797721026, "learning_rate": 3.4664695227419646e-05, "loss": 0.8323, "step": 210480 }, { "epoch": 3.6954651591495637, "grad_norm": 0.05795709326077588, "learning_rate": 3.4656423587262896e-05, "loss": 0.8398, "step": 210490 }, { "epoch": 3.6956407240295652, "grad_norm": 0.06602683613047441, "learning_rate": 3.4648152787692015e-05, "loss": 0.8307, "step": 210500 }, { "epoch": 3.6958162889095663, "grad_norm": 0.05080485472683296, "learning_rate": 3.4639882828811714e-05, "loss": 0.8419, "step": 210510 }, { "epoch": 3.695991853789568, "grad_norm": 0.050030361585325776, "learning_rate": 3.4631613710726845e-05, "loss": 0.8414, "step": 210520 }, { "epoch": 3.696167418669569, "grad_norm": 0.044574211329613776, "learning_rate": 3.4623345433542085e-05, "loss": 0.8392, "step": 210530 }, { "epoch": 3.6963429835495707, "grad_norm": 0.05010340731938054, "learning_rate": 3.461507799736233e-05, "loss": 0.8358, "step": 210540 }, { "epoch": 3.696518548429572, "grad_norm": 0.05059269616559478, "learning_rate": 3.460681140229226e-05, "loss": 0.8441, "step": 210550 }, { "epoch": 3.6966941133095736, "grad_norm": 0.06341830589343718, "learning_rate": 3.459854564843654e-05, "loss": 0.8418, "step": 210560 }, { "epoch": 3.696869678189575, "grad_norm": 0.05779893844511105, "learning_rate": 3.459028073590005e-05, "loss": 0.8337, "step": 210570 }, { "epoch": 3.697045243069576, "grad_norm": 0.05208090540987079, "learning_rate": 3.45820166647874e-05, "loss": 0.8401, "step": 210580 }, { "epoch": 3.6972208079495776, "grad_norm": 0.058700088959292074, "learning_rate": 3.457375343520335e-05, "loss": 0.8429, "step": 210590 }, { "epoch": 3.697396372829579, "grad_norm": 0.046987212520536586, "learning_rate": 3.456549104725254e-05, "loss": 0.8386, "step": 210600 }, { "epoch": 3.6975719377095806, "grad_norm": 0.05542721051985387, "learning_rate": 3.4557229501039695e-05, "loss": 0.8349, "step": 210610 }, { "epoch": 3.697747502589582, "grad_norm": 0.04492566221969696, "learning_rate": 3.454896879666951e-05, "loss": 0.8432, "step": 210620 }, { "epoch": 3.6979230674695835, "grad_norm": 0.04865270248364846, "learning_rate": 3.454070893424657e-05, "loss": 0.8431, "step": 210630 }, { "epoch": 3.698098632349585, "grad_norm": 0.04187115414609849, "learning_rate": 3.4532449913875647e-05, "loss": 0.8328, "step": 210640 }, { "epoch": 3.698274197229586, "grad_norm": 0.053485016976013704, "learning_rate": 3.4524191735661294e-05, "loss": 0.8374, "step": 210650 }, { "epoch": 3.6984497621095875, "grad_norm": 0.05558163197089914, "learning_rate": 3.451593439970817e-05, "loss": 0.8411, "step": 210660 }, { "epoch": 3.698625326989589, "grad_norm": 0.06447959188721396, "learning_rate": 3.4507677906120934e-05, "loss": 0.8377, "step": 210670 }, { "epoch": 3.6988008918695905, "grad_norm": 0.11819902647307576, "learning_rate": 3.449942225500412e-05, "loss": 0.8425, "step": 210680 }, { "epoch": 3.698976456749592, "grad_norm": 0.04232464880583048, "learning_rate": 3.44911674464624e-05, "loss": 0.8373, "step": 210690 }, { "epoch": 3.699152021629593, "grad_norm": 0.047652104914990376, "learning_rate": 3.4482913480600284e-05, "loss": 0.8391, "step": 210700 }, { "epoch": 3.6993275865095945, "grad_norm": 0.0574735920581166, "learning_rate": 3.4474660357522464e-05, "loss": 0.8384, "step": 210710 }, { "epoch": 3.699503151389596, "grad_norm": 0.054226390927207796, "learning_rate": 3.4466408077333445e-05, "loss": 0.8417, "step": 210720 }, { "epoch": 3.6996787162695974, "grad_norm": 0.051028774715313766, "learning_rate": 3.445815664013771e-05, "loss": 0.8374, "step": 210730 }, { "epoch": 3.699854281149599, "grad_norm": 0.06745172834842866, "learning_rate": 3.444990604603997e-05, "loss": 0.838, "step": 210740 }, { "epoch": 3.7000298460296004, "grad_norm": 0.06632664223643357, "learning_rate": 3.4441656295144625e-05, "loss": 0.8326, "step": 210750 }, { "epoch": 3.700205410909602, "grad_norm": 0.04143125439049729, "learning_rate": 3.443340738755629e-05, "loss": 0.8412, "step": 210760 }, { "epoch": 3.700380975789603, "grad_norm": 0.04586338034728872, "learning_rate": 3.442515932337941e-05, "loss": 0.8304, "step": 210770 }, { "epoch": 3.7005565406696044, "grad_norm": 0.052968735159252274, "learning_rate": 3.441691210271852e-05, "loss": 0.8407, "step": 210780 }, { "epoch": 3.700732105549606, "grad_norm": 0.04880326610507707, "learning_rate": 3.440866572567815e-05, "loss": 0.8405, "step": 210790 }, { "epoch": 3.7009076704296073, "grad_norm": 0.07361666858173403, "learning_rate": 3.4400420192362665e-05, "loss": 0.8361, "step": 210800 }, { "epoch": 3.701083235309609, "grad_norm": 0.04995755587397494, "learning_rate": 3.4392175502876695e-05, "loss": 0.8366, "step": 210810 }, { "epoch": 3.70125880018961, "grad_norm": 0.060481327921479075, "learning_rate": 3.4383931657324604e-05, "loss": 0.8427, "step": 210820 }, { "epoch": 3.7014343650696113, "grad_norm": 0.04809399665385191, "learning_rate": 3.437568865581086e-05, "loss": 0.8417, "step": 210830 }, { "epoch": 3.701609929949613, "grad_norm": 0.057368890664269284, "learning_rate": 3.436744649843994e-05, "loss": 0.8527, "step": 210840 }, { "epoch": 3.7017854948296143, "grad_norm": 0.04920479967588251, "learning_rate": 3.435920518531621e-05, "loss": 0.8282, "step": 210850 }, { "epoch": 3.7019610597096158, "grad_norm": 0.05423460742587199, "learning_rate": 3.435096471654415e-05, "loss": 0.8341, "step": 210860 }, { "epoch": 3.7021366245896172, "grad_norm": 0.05096298622470347, "learning_rate": 3.4342725092228085e-05, "loss": 0.8401, "step": 210870 }, { "epoch": 3.7023121894696187, "grad_norm": 0.051575256071760264, "learning_rate": 3.4334486312472524e-05, "loss": 0.8427, "step": 210880 }, { "epoch": 3.7024877543496197, "grad_norm": 0.06132158790919768, "learning_rate": 3.4326248377381796e-05, "loss": 0.8449, "step": 210890 }, { "epoch": 3.702663319229621, "grad_norm": 0.049004046552582385, "learning_rate": 3.431801128706023e-05, "loss": 0.8572, "step": 210900 }, { "epoch": 3.7028388841096227, "grad_norm": 0.043836373906013346, "learning_rate": 3.4309775041612296e-05, "loss": 0.8395, "step": 210910 }, { "epoch": 3.703014448989624, "grad_norm": 0.08939843313707578, "learning_rate": 3.430153964114225e-05, "loss": 0.8501, "step": 210920 }, { "epoch": 3.7031900138696257, "grad_norm": 0.045758243512221346, "learning_rate": 3.429330508575453e-05, "loss": 0.8395, "step": 210930 }, { "epoch": 3.7033655787496267, "grad_norm": 0.04538770957028324, "learning_rate": 3.428507137555337e-05, "loss": 0.8372, "step": 210940 }, { "epoch": 3.703541143629628, "grad_norm": 0.048061062634587676, "learning_rate": 3.427683851064316e-05, "loss": 0.8394, "step": 210950 }, { "epoch": 3.7037167085096296, "grad_norm": 0.044043391927720485, "learning_rate": 3.4268606491128204e-05, "loss": 0.8399, "step": 210960 }, { "epoch": 3.703892273389631, "grad_norm": 0.06890499962125894, "learning_rate": 3.426037531711275e-05, "loss": 0.8359, "step": 210970 }, { "epoch": 3.7040678382696326, "grad_norm": 0.04994532190146109, "learning_rate": 3.425214498870118e-05, "loss": 0.8452, "step": 210980 }, { "epoch": 3.704243403149634, "grad_norm": 0.04684929595158864, "learning_rate": 3.4243915505997696e-05, "loss": 0.8454, "step": 210990 }, { "epoch": 3.7044189680296356, "grad_norm": 0.061148589777111385, "learning_rate": 3.42356868691066e-05, "loss": 0.8347, "step": 211000 }, { "epoch": 3.7045945329096366, "grad_norm": 0.07020875000480606, "learning_rate": 3.422745907813218e-05, "loss": 0.8455, "step": 211010 }, { "epoch": 3.704770097789638, "grad_norm": 0.04792958826868132, "learning_rate": 3.421923213317861e-05, "loss": 0.8358, "step": 211020 }, { "epoch": 3.7049456626696395, "grad_norm": 0.045625088916198514, "learning_rate": 3.421100603435021e-05, "loss": 0.8433, "step": 211030 }, { "epoch": 3.705121227549641, "grad_norm": 0.04884313080810672, "learning_rate": 3.420278078175113e-05, "loss": 0.8369, "step": 211040 }, { "epoch": 3.7052967924296425, "grad_norm": 0.04507021273343106, "learning_rate": 3.419455637548561e-05, "loss": 0.8397, "step": 211050 }, { "epoch": 3.7054723573096435, "grad_norm": 0.04451762240257929, "learning_rate": 3.418633281565791e-05, "loss": 0.8334, "step": 211060 }, { "epoch": 3.7056479221896454, "grad_norm": 0.06389012496252895, "learning_rate": 3.41781101023721e-05, "loss": 0.836, "step": 211070 }, { "epoch": 3.7058234870696465, "grad_norm": 0.05830000591891933, "learning_rate": 3.416988823573251e-05, "loss": 0.8421, "step": 211080 }, { "epoch": 3.705999051949648, "grad_norm": 0.057202211408835295, "learning_rate": 3.416166721584322e-05, "loss": 0.8387, "step": 211090 }, { "epoch": 3.7061746168296494, "grad_norm": 0.05437616102428297, "learning_rate": 3.415344704280844e-05, "loss": 0.8395, "step": 211100 }, { "epoch": 3.706350181709651, "grad_norm": 0.060318531340682484, "learning_rate": 3.414522771673226e-05, "loss": 0.8419, "step": 211110 }, { "epoch": 3.7065257465896524, "grad_norm": 0.051899113802191144, "learning_rate": 3.413700923771887e-05, "loss": 0.8321, "step": 211120 }, { "epoch": 3.7067013114696534, "grad_norm": 0.04934325190891691, "learning_rate": 3.4128791605872414e-05, "loss": 0.8423, "step": 211130 }, { "epoch": 3.706876876349655, "grad_norm": 0.060311204605374494, "learning_rate": 3.412057482129692e-05, "loss": 0.8415, "step": 211140 }, { "epoch": 3.7070524412296564, "grad_norm": 0.05346088066454043, "learning_rate": 3.411235888409662e-05, "loss": 0.8397, "step": 211150 }, { "epoch": 3.707228006109658, "grad_norm": 0.052525068915386185, "learning_rate": 3.4104143794375527e-05, "loss": 0.8352, "step": 211160 }, { "epoch": 3.7074035709896593, "grad_norm": 0.051372483834923516, "learning_rate": 3.4095929552237746e-05, "loss": 0.8311, "step": 211170 }, { "epoch": 3.7075791358696604, "grad_norm": 0.054321220518864975, "learning_rate": 3.40877161577874e-05, "loss": 0.838, "step": 211180 }, { "epoch": 3.7077547007496623, "grad_norm": 0.05503642962173508, "learning_rate": 3.407950361112847e-05, "loss": 0.8458, "step": 211190 }, { "epoch": 3.7079302656296633, "grad_norm": 0.05184289859693415, "learning_rate": 3.4071291912365095e-05, "loss": 0.8293, "step": 211200 }, { "epoch": 3.708105830509665, "grad_norm": 0.0509117364739118, "learning_rate": 3.406308106160125e-05, "loss": 0.839, "step": 211210 }, { "epoch": 3.7082813953896663, "grad_norm": 0.061126858707889684, "learning_rate": 3.405487105894099e-05, "loss": 0.8441, "step": 211220 }, { "epoch": 3.7084569602696678, "grad_norm": 0.061876622247823566, "learning_rate": 3.404666190448838e-05, "loss": 0.8421, "step": 211230 }, { "epoch": 3.7086325251496692, "grad_norm": 0.05267957443400821, "learning_rate": 3.403845359834732e-05, "loss": 0.841, "step": 211240 }, { "epoch": 3.7088080900296703, "grad_norm": 0.05032043330746887, "learning_rate": 3.403024614062196e-05, "loss": 0.8394, "step": 211250 }, { "epoch": 3.7089836549096717, "grad_norm": 0.0590903588063783, "learning_rate": 3.4022039531416186e-05, "loss": 0.84, "step": 211260 }, { "epoch": 3.7091592197896732, "grad_norm": 0.05217741931538674, "learning_rate": 3.401383377083403e-05, "loss": 0.8451, "step": 211270 }, { "epoch": 3.7093347846696747, "grad_norm": 0.08187858055814767, "learning_rate": 3.4005628858979414e-05, "loss": 0.8352, "step": 211280 }, { "epoch": 3.709510349549676, "grad_norm": 0.049278127190988355, "learning_rate": 3.399742479595631e-05, "loss": 0.8343, "step": 211290 }, { "epoch": 3.709685914429677, "grad_norm": 0.04620167727725859, "learning_rate": 3.398922158186872e-05, "loss": 0.8359, "step": 211300 }, { "epoch": 3.709861479309679, "grad_norm": 0.05426954197865697, "learning_rate": 3.398101921682046e-05, "loss": 0.8431, "step": 211310 }, { "epoch": 3.71003704418968, "grad_norm": 0.053349923282222045, "learning_rate": 3.39728177009156e-05, "loss": 0.8388, "step": 211320 }, { "epoch": 3.7102126090696816, "grad_norm": 0.06566209991368654, "learning_rate": 3.396461703425796e-05, "loss": 0.8337, "step": 211330 }, { "epoch": 3.710388173949683, "grad_norm": 0.04489939394555453, "learning_rate": 3.3956417216951456e-05, "loss": 0.8413, "step": 211340 }, { "epoch": 3.7105637388296846, "grad_norm": 0.06818272257671404, "learning_rate": 3.3948218249100034e-05, "loss": 0.843, "step": 211350 }, { "epoch": 3.710739303709686, "grad_norm": 0.05981376833956065, "learning_rate": 3.3940020130807506e-05, "loss": 0.8338, "step": 211360 }, { "epoch": 3.710914868589687, "grad_norm": 0.06200623992580753, "learning_rate": 3.393182286217781e-05, "loss": 0.8402, "step": 211370 }, { "epoch": 3.7110904334696886, "grad_norm": 0.05805804370033374, "learning_rate": 3.392362644331473e-05, "loss": 0.8425, "step": 211380 }, { "epoch": 3.71126599834969, "grad_norm": 0.04480896663990068, "learning_rate": 3.391543087432216e-05, "loss": 0.8423, "step": 211390 }, { "epoch": 3.7114415632296915, "grad_norm": 0.038222225515256315, "learning_rate": 3.390723615530398e-05, "loss": 0.8522, "step": 211400 }, { "epoch": 3.711617128109693, "grad_norm": 0.04136648659982268, "learning_rate": 3.38990422863639e-05, "loss": 0.8407, "step": 211410 }, { "epoch": 3.7117926929896945, "grad_norm": 0.05390155238474064, "learning_rate": 3.389084926760589e-05, "loss": 0.8397, "step": 211420 }, { "epoch": 3.711968257869696, "grad_norm": 0.059507628774379255, "learning_rate": 3.3882657099133635e-05, "loss": 0.8384, "step": 211430 }, { "epoch": 3.712143822749697, "grad_norm": 0.05345525621741854, "learning_rate": 3.387446578105101e-05, "loss": 0.8419, "step": 211440 }, { "epoch": 3.7123193876296985, "grad_norm": 0.06851458102237448, "learning_rate": 3.386627531346175e-05, "loss": 0.8413, "step": 211450 }, { "epoch": 3.7124949525097, "grad_norm": 0.05488877101736562, "learning_rate": 3.385808569646964e-05, "loss": 0.8463, "step": 211460 }, { "epoch": 3.7126705173897014, "grad_norm": 0.046601859496149436, "learning_rate": 3.384989693017849e-05, "loss": 0.8481, "step": 211470 }, { "epoch": 3.712846082269703, "grad_norm": 0.0840410132708092, "learning_rate": 3.384170901469194e-05, "loss": 0.8367, "step": 211480 }, { "epoch": 3.713021647149704, "grad_norm": 0.041225452991110384, "learning_rate": 3.383352195011388e-05, "loss": 0.8407, "step": 211490 }, { "epoch": 3.7131972120297054, "grad_norm": 0.051726006492100934, "learning_rate": 3.382533573654793e-05, "loss": 0.8349, "step": 211500 }, { "epoch": 3.713372776909707, "grad_norm": 0.07038255134504118, "learning_rate": 3.381715037409786e-05, "loss": 0.8338, "step": 211510 }, { "epoch": 3.7135483417897084, "grad_norm": 0.05068089597812051, "learning_rate": 3.38089658628674e-05, "loss": 0.8331, "step": 211520 }, { "epoch": 3.71372390666971, "grad_norm": 0.05258460607923388, "learning_rate": 3.380078220296018e-05, "loss": 0.8368, "step": 211530 }, { "epoch": 3.7138994715497113, "grad_norm": 0.05435118974023497, "learning_rate": 3.379259939447997e-05, "loss": 0.838, "step": 211540 }, { "epoch": 3.714075036429713, "grad_norm": 0.06047560336737044, "learning_rate": 3.378441743753037e-05, "loss": 0.8411, "step": 211550 }, { "epoch": 3.714250601309714, "grad_norm": 0.05882458176028383, "learning_rate": 3.377623633221508e-05, "loss": 0.8391, "step": 211560 }, { "epoch": 3.7144261661897153, "grad_norm": 0.06023305131532877, "learning_rate": 3.376805607863779e-05, "loss": 0.8392, "step": 211570 }, { "epoch": 3.714601731069717, "grad_norm": 0.05874294828857853, "learning_rate": 3.375987667690206e-05, "loss": 0.8352, "step": 211580 }, { "epoch": 3.7147772959497183, "grad_norm": 0.04875939714100281, "learning_rate": 3.3751698127111624e-05, "loss": 0.8366, "step": 211590 }, { "epoch": 3.7149528608297198, "grad_norm": 0.04939511399166342, "learning_rate": 3.3743520429370055e-05, "loss": 0.8402, "step": 211600 }, { "epoch": 3.715128425709721, "grad_norm": 0.047450908840441834, "learning_rate": 3.373534358378099e-05, "loss": 0.8409, "step": 211610 }, { "epoch": 3.7153039905897223, "grad_norm": 0.06928157392591927, "learning_rate": 3.372716759044798e-05, "loss": 0.8359, "step": 211620 }, { "epoch": 3.7154795554697237, "grad_norm": 0.06462175430331334, "learning_rate": 3.3718992449474644e-05, "loss": 0.8315, "step": 211630 }, { "epoch": 3.7156551203497252, "grad_norm": 0.054856528552533, "learning_rate": 3.371081816096461e-05, "loss": 0.8435, "step": 211640 }, { "epoch": 3.7158306852297267, "grad_norm": 0.05569156508195927, "learning_rate": 3.370264472502134e-05, "loss": 0.8446, "step": 211650 }, { "epoch": 3.716006250109728, "grad_norm": 0.12912569573053617, "learning_rate": 3.3694472141748515e-05, "loss": 0.8413, "step": 211660 }, { "epoch": 3.7161818149897297, "grad_norm": 0.05753633787610997, "learning_rate": 3.368630041124961e-05, "loss": 0.8417, "step": 211670 }, { "epoch": 3.7163573798697307, "grad_norm": 0.04501624909553479, "learning_rate": 3.367812953362816e-05, "loss": 0.8381, "step": 211680 }, { "epoch": 3.716532944749732, "grad_norm": 0.04304739130829031, "learning_rate": 3.3669959508987765e-05, "loss": 0.8392, "step": 211690 }, { "epoch": 3.7167085096297336, "grad_norm": 0.05331967262727377, "learning_rate": 3.3661790337431834e-05, "loss": 0.8407, "step": 211700 }, { "epoch": 3.716884074509735, "grad_norm": 0.05803085658349286, "learning_rate": 3.365362201906397e-05, "loss": 0.834, "step": 211710 }, { "epoch": 3.7170596393897366, "grad_norm": 0.052279928490086745, "learning_rate": 3.3645454553987583e-05, "loss": 0.837, "step": 211720 }, { "epoch": 3.7172352042697376, "grad_norm": 0.08247353683865608, "learning_rate": 3.363728794230619e-05, "loss": 0.8397, "step": 211730 }, { "epoch": 3.7174107691497396, "grad_norm": 0.060532099423884246, "learning_rate": 3.362912218412331e-05, "loss": 0.8379, "step": 211740 }, { "epoch": 3.7175863340297406, "grad_norm": 0.04702709830096514, "learning_rate": 3.3620957279542304e-05, "loss": 0.8451, "step": 211750 }, { "epoch": 3.717761898909742, "grad_norm": 0.05027737067295813, "learning_rate": 3.361279322866674e-05, "loss": 0.846, "step": 211760 }, { "epoch": 3.7179374637897435, "grad_norm": 0.051426700578984956, "learning_rate": 3.3604630031599976e-05, "loss": 0.8397, "step": 211770 }, { "epoch": 3.718113028669745, "grad_norm": 0.046740327697241756, "learning_rate": 3.359646768844549e-05, "loss": 0.8484, "step": 211780 }, { "epoch": 3.7182885935497465, "grad_norm": 0.049756238202780974, "learning_rate": 3.3588306199306644e-05, "loss": 0.8379, "step": 211790 }, { "epoch": 3.7184641584297475, "grad_norm": 0.04307002480812925, "learning_rate": 3.358014556428688e-05, "loss": 0.8419, "step": 211800 }, { "epoch": 3.718639723309749, "grad_norm": 0.051941765712855026, "learning_rate": 3.3571985783489615e-05, "loss": 0.8406, "step": 211810 }, { "epoch": 3.7188152881897505, "grad_norm": 0.05389997941886982, "learning_rate": 3.356382685701816e-05, "loss": 0.8364, "step": 211820 }, { "epoch": 3.718990853069752, "grad_norm": 0.04402519095147719, "learning_rate": 3.355566878497601e-05, "loss": 0.8352, "step": 211830 }, { "epoch": 3.7191664179497534, "grad_norm": 0.05157071093856418, "learning_rate": 3.3547511567466417e-05, "loss": 0.8444, "step": 211840 }, { "epoch": 3.7193419828297545, "grad_norm": 0.048695223281801814, "learning_rate": 3.3539355204592795e-05, "loss": 0.8434, "step": 211850 }, { "epoch": 3.7195175477097564, "grad_norm": 0.05392470736880503, "learning_rate": 3.353119969645849e-05, "loss": 0.8378, "step": 211860 }, { "epoch": 3.7196931125897574, "grad_norm": 0.047699334396605904, "learning_rate": 3.352304504316679e-05, "loss": 0.8325, "step": 211870 }, { "epoch": 3.719868677469759, "grad_norm": 0.05212614602894159, "learning_rate": 3.351489124482108e-05, "loss": 0.8428, "step": 211880 }, { "epoch": 3.7200442423497604, "grad_norm": 0.06159602156912912, "learning_rate": 3.350673830152458e-05, "loss": 0.844, "step": 211890 }, { "epoch": 3.720219807229762, "grad_norm": 0.05375782164352109, "learning_rate": 3.349858621338066e-05, "loss": 0.843, "step": 211900 }, { "epoch": 3.7203953721097633, "grad_norm": 0.05219486738874985, "learning_rate": 3.3490434980492604e-05, "loss": 0.8391, "step": 211910 }, { "epoch": 3.7205709369897644, "grad_norm": 0.05604292479382059, "learning_rate": 3.348228460296362e-05, "loss": 0.8395, "step": 211920 }, { "epoch": 3.720746501869766, "grad_norm": 0.0507674263825966, "learning_rate": 3.3474135080897096e-05, "loss": 0.848, "step": 211930 }, { "epoch": 3.7209220667497673, "grad_norm": 0.055901546566579516, "learning_rate": 3.346598641439618e-05, "loss": 0.8436, "step": 211940 }, { "epoch": 3.721097631629769, "grad_norm": 0.0451306061586399, "learning_rate": 3.345783860356421e-05, "loss": 0.8365, "step": 211950 }, { "epoch": 3.7212731965097703, "grad_norm": 0.05337101361749384, "learning_rate": 3.344969164850432e-05, "loss": 0.8414, "step": 211960 }, { "epoch": 3.7214487613897713, "grad_norm": 0.04662389819927643, "learning_rate": 3.344154554931979e-05, "loss": 0.8415, "step": 211970 }, { "epoch": 3.7216243262697732, "grad_norm": 0.048586630364780205, "learning_rate": 3.3433400306113856e-05, "loss": 0.8394, "step": 211980 }, { "epoch": 3.7217998911497743, "grad_norm": 0.04213945952742679, "learning_rate": 3.3425255918989626e-05, "loss": 0.829, "step": 211990 }, { "epoch": 3.7219754560297758, "grad_norm": 0.05333755891131583, "learning_rate": 3.341711238805041e-05, "loss": 0.8322, "step": 212000 }, { "epoch": 3.7221510209097772, "grad_norm": 0.06394852355850848, "learning_rate": 3.340896971339931e-05, "loss": 0.8394, "step": 212010 }, { "epoch": 3.7223265857897787, "grad_norm": 0.06813276325580744, "learning_rate": 3.3400827895139514e-05, "loss": 0.8384, "step": 212020 }, { "epoch": 3.72250215066978, "grad_norm": 0.05653496427436215, "learning_rate": 3.3392686933374225e-05, "loss": 0.8444, "step": 212030 }, { "epoch": 3.722677715549781, "grad_norm": 0.05836730445372674, "learning_rate": 3.3384546828206506e-05, "loss": 0.8367, "step": 212040 }, { "epoch": 3.7228532804297827, "grad_norm": 0.06029061575185375, "learning_rate": 3.337640757973958e-05, "loss": 0.8457, "step": 212050 }, { "epoch": 3.723028845309784, "grad_norm": 0.05196354596632216, "learning_rate": 3.336826918807648e-05, "loss": 0.8411, "step": 212060 }, { "epoch": 3.7232044101897857, "grad_norm": 0.058684666282881756, "learning_rate": 3.336013165332039e-05, "loss": 0.8418, "step": 212070 }, { "epoch": 3.723379975069787, "grad_norm": 0.060137666732620124, "learning_rate": 3.335199497557442e-05, "loss": 0.8375, "step": 212080 }, { "epoch": 3.7235555399497886, "grad_norm": 0.057179229427399676, "learning_rate": 3.334385915494156e-05, "loss": 0.8381, "step": 212090 }, { "epoch": 3.72373110482979, "grad_norm": 0.04438574810694763, "learning_rate": 3.333572419152505e-05, "loss": 0.8383, "step": 212100 }, { "epoch": 3.723906669709791, "grad_norm": 0.07130878496480113, "learning_rate": 3.3327590085427855e-05, "loss": 0.8389, "step": 212110 }, { "epoch": 3.7240822345897926, "grad_norm": 0.054191471894848055, "learning_rate": 3.331945683675308e-05, "loss": 0.844, "step": 212120 }, { "epoch": 3.724257799469794, "grad_norm": 0.07899086215703228, "learning_rate": 3.3311324445603745e-05, "loss": 0.8315, "step": 212130 }, { "epoch": 3.7244333643497955, "grad_norm": 0.04878875553823231, "learning_rate": 3.3303192912082883e-05, "loss": 0.8365, "step": 212140 }, { "epoch": 3.724608929229797, "grad_norm": 0.060380881831167764, "learning_rate": 3.329506223629358e-05, "loss": 0.8371, "step": 212150 }, { "epoch": 3.724784494109798, "grad_norm": 0.04862085145415083, "learning_rate": 3.328693241833875e-05, "loss": 0.8382, "step": 212160 }, { "epoch": 3.7249600589897995, "grad_norm": 0.11551659781587167, "learning_rate": 3.327880345832153e-05, "loss": 0.8305, "step": 212170 }, { "epoch": 3.725135623869801, "grad_norm": 0.06553234191613798, "learning_rate": 3.3270675356344814e-05, "loss": 0.8449, "step": 212180 }, { "epoch": 3.7253111887498025, "grad_norm": 0.055278431735996406, "learning_rate": 3.3262548112511624e-05, "loss": 0.849, "step": 212190 }, { "epoch": 3.725486753629804, "grad_norm": 0.06703463675007522, "learning_rate": 3.325442172692496e-05, "loss": 0.8399, "step": 212200 }, { "epoch": 3.7256623185098054, "grad_norm": 0.04959644594201424, "learning_rate": 3.324629619968773e-05, "loss": 0.8453, "step": 212210 }, { "epoch": 3.725837883389807, "grad_norm": 0.04748227768387074, "learning_rate": 3.323817153090294e-05, "loss": 0.8421, "step": 212220 }, { "epoch": 3.726013448269808, "grad_norm": 0.0506557650207054, "learning_rate": 3.3230047720673474e-05, "loss": 0.8329, "step": 212230 }, { "epoch": 3.7261890131498094, "grad_norm": 0.05808881295052908, "learning_rate": 3.3221924769102284e-05, "loss": 0.8439, "step": 212240 }, { "epoch": 3.726364578029811, "grad_norm": 0.05300734040378929, "learning_rate": 3.321380267629233e-05, "loss": 0.8385, "step": 212250 }, { "epoch": 3.7265401429098124, "grad_norm": 0.0694981988424773, "learning_rate": 3.3205681442346424e-05, "loss": 0.8304, "step": 212260 }, { "epoch": 3.726715707789814, "grad_norm": 0.04255067545850356, "learning_rate": 3.3197561067367595e-05, "loss": 0.844, "step": 212270 }, { "epoch": 3.726891272669815, "grad_norm": 0.05465040874152766, "learning_rate": 3.318944155145863e-05, "loss": 0.8445, "step": 212280 }, { "epoch": 3.7270668375498164, "grad_norm": 0.050188947139312406, "learning_rate": 3.318132289472247e-05, "loss": 0.8381, "step": 212290 }, { "epoch": 3.727242402429818, "grad_norm": 0.05792413368015231, "learning_rate": 3.31732050972619e-05, "loss": 0.832, "step": 212300 }, { "epoch": 3.7274179673098193, "grad_norm": 0.058238972373513886, "learning_rate": 3.316508815917983e-05, "loss": 0.8369, "step": 212310 }, { "epoch": 3.727593532189821, "grad_norm": 0.04409602404997595, "learning_rate": 3.315697208057914e-05, "loss": 0.8413, "step": 212320 }, { "epoch": 3.7277690970698223, "grad_norm": 0.0628565005293095, "learning_rate": 3.3148856861562534e-05, "loss": 0.8417, "step": 212330 }, { "epoch": 3.7279446619498238, "grad_norm": 0.05400111387220291, "learning_rate": 3.3140742502232974e-05, "loss": 0.8453, "step": 212340 }, { "epoch": 3.728120226829825, "grad_norm": 0.05913862315173438, "learning_rate": 3.313262900269319e-05, "loss": 0.8386, "step": 212350 }, { "epoch": 3.7282957917098263, "grad_norm": 0.06036711694021032, "learning_rate": 3.312451636304601e-05, "loss": 0.8405, "step": 212360 }, { "epoch": 3.7284713565898278, "grad_norm": 0.04807688011569783, "learning_rate": 3.311640458339424e-05, "loss": 0.8416, "step": 212370 }, { "epoch": 3.7286469214698292, "grad_norm": 0.045189318857777015, "learning_rate": 3.3108293663840595e-05, "loss": 0.8418, "step": 212380 }, { "epoch": 3.7288224863498307, "grad_norm": 0.061674477545877, "learning_rate": 3.310018360448792e-05, "loss": 0.8373, "step": 212390 }, { "epoch": 3.7289980512298317, "grad_norm": 0.04519026934853633, "learning_rate": 3.3092074405438903e-05, "loss": 0.8391, "step": 212400 }, { "epoch": 3.7291736161098332, "grad_norm": 0.05799034002010161, "learning_rate": 3.3083966066796304e-05, "loss": 0.8363, "step": 212410 }, { "epoch": 3.7293491809898347, "grad_norm": 0.05624267961572428, "learning_rate": 3.307585858866292e-05, "loss": 0.8484, "step": 212420 }, { "epoch": 3.729524745869836, "grad_norm": 0.05169199630375892, "learning_rate": 3.3067751971141354e-05, "loss": 0.834, "step": 212430 }, { "epoch": 3.7297003107498377, "grad_norm": 0.057516438588558746, "learning_rate": 3.3059646214334454e-05, "loss": 0.8338, "step": 212440 }, { "epoch": 3.729875875629839, "grad_norm": 0.07584227653420184, "learning_rate": 3.3051541318344817e-05, "loss": 0.8368, "step": 212450 }, { "epoch": 3.7300514405098406, "grad_norm": 0.062296055092585, "learning_rate": 3.3043437283275225e-05, "loss": 0.8411, "step": 212460 }, { "epoch": 3.7302270053898416, "grad_norm": 0.050438747538415415, "learning_rate": 3.303533410922826e-05, "loss": 0.8412, "step": 212470 }, { "epoch": 3.730402570269843, "grad_norm": 0.040833666917075684, "learning_rate": 3.302723179630664e-05, "loss": 0.8387, "step": 212480 }, { "epoch": 3.7305781351498446, "grad_norm": 0.05210734949061622, "learning_rate": 3.301913034461305e-05, "loss": 0.8385, "step": 212490 }, { "epoch": 3.730753700029846, "grad_norm": 0.05873540177572583, "learning_rate": 3.301102975425005e-05, "loss": 0.8431, "step": 212500 }, { "epoch": 3.7309292649098476, "grad_norm": 0.05704858192109554, "learning_rate": 3.3002930025320394e-05, "loss": 0.8428, "step": 212510 }, { "epoch": 3.7311048297898486, "grad_norm": 0.06337763740106835, "learning_rate": 3.2994831157926606e-05, "loss": 0.839, "step": 212520 }, { "epoch": 3.7312803946698505, "grad_norm": 0.045357381613857606, "learning_rate": 3.298673315217136e-05, "loss": 0.8408, "step": 212530 }, { "epoch": 3.7314559595498515, "grad_norm": 0.0621416034388573, "learning_rate": 3.297863600815725e-05, "loss": 0.8303, "step": 212540 }, { "epoch": 3.731631524429853, "grad_norm": 0.05617285597137504, "learning_rate": 3.2970539725986835e-05, "loss": 0.8438, "step": 212550 }, { "epoch": 3.7318070893098545, "grad_norm": 0.04628398008447487, "learning_rate": 3.2962444305762755e-05, "loss": 0.8402, "step": 212560 }, { "epoch": 3.731982654189856, "grad_norm": 0.04463173371420275, "learning_rate": 3.29543497475875e-05, "loss": 0.8427, "step": 212570 }, { "epoch": 3.7321582190698575, "grad_norm": 0.06288880794561229, "learning_rate": 3.2946256051563686e-05, "loss": 0.8435, "step": 212580 }, { "epoch": 3.7323337839498585, "grad_norm": 0.05146609206765846, "learning_rate": 3.293816321779388e-05, "loss": 0.8309, "step": 212590 }, { "epoch": 3.73250934882986, "grad_norm": 0.05535632597032615, "learning_rate": 3.293007124638053e-05, "loss": 0.8428, "step": 212600 }, { "epoch": 3.7326849137098614, "grad_norm": 0.04881312737481225, "learning_rate": 3.292198013742628e-05, "loss": 0.8389, "step": 212610 }, { "epoch": 3.732860478589863, "grad_norm": 0.0537407593086242, "learning_rate": 3.2913889891033564e-05, "loss": 0.8412, "step": 212620 }, { "epoch": 3.7330360434698644, "grad_norm": 0.05041473028290572, "learning_rate": 3.290580050730493e-05, "loss": 0.8384, "step": 212630 }, { "epoch": 3.7332116083498654, "grad_norm": 0.06015939528140917, "learning_rate": 3.2897711986342835e-05, "loss": 0.8417, "step": 212640 }, { "epoch": 3.7333871732298674, "grad_norm": 0.04964331812214613, "learning_rate": 3.288962432824979e-05, "loss": 0.8438, "step": 212650 }, { "epoch": 3.7335627381098684, "grad_norm": 0.05900943179909198, "learning_rate": 3.288153753312827e-05, "loss": 0.8333, "step": 212660 }, { "epoch": 3.73373830298987, "grad_norm": 0.04751677025590802, "learning_rate": 3.2873451601080663e-05, "loss": 0.836, "step": 212670 }, { "epoch": 3.7339138678698713, "grad_norm": 0.04407667864348086, "learning_rate": 3.286536653220954e-05, "loss": 0.841, "step": 212680 }, { "epoch": 3.734089432749873, "grad_norm": 0.045556471523579604, "learning_rate": 3.285728232661727e-05, "loss": 0.8498, "step": 212690 }, { "epoch": 3.7342649976298743, "grad_norm": 0.04230991139030103, "learning_rate": 3.2849198984406274e-05, "loss": 0.8447, "step": 212700 }, { "epoch": 3.7344405625098753, "grad_norm": 0.042535770334563724, "learning_rate": 3.2841116505679025e-05, "loss": 0.8356, "step": 212710 }, { "epoch": 3.734616127389877, "grad_norm": 0.058166415210747356, "learning_rate": 3.283303489053787e-05, "loss": 0.8342, "step": 212720 }, { "epoch": 3.7347916922698783, "grad_norm": 0.05469852175769484, "learning_rate": 3.282495413908525e-05, "loss": 0.8325, "step": 212730 }, { "epoch": 3.7349672571498798, "grad_norm": 0.05183615904564911, "learning_rate": 3.281687425142349e-05, "loss": 0.8391, "step": 212740 }, { "epoch": 3.7351428220298812, "grad_norm": 0.05339543169953448, "learning_rate": 3.280879522765501e-05, "loss": 0.8374, "step": 212750 }, { "epoch": 3.7353183869098823, "grad_norm": 0.0562632219609613, "learning_rate": 3.280071706788219e-05, "loss": 0.8425, "step": 212760 }, { "epoch": 3.735493951789884, "grad_norm": 0.046443974022541655, "learning_rate": 3.27926397722073e-05, "loss": 0.8324, "step": 212770 }, { "epoch": 3.7356695166698852, "grad_norm": 0.053582151914072756, "learning_rate": 3.278456334073278e-05, "loss": 0.8348, "step": 212780 }, { "epoch": 3.7358450815498867, "grad_norm": 0.07331862059097, "learning_rate": 3.277648777356091e-05, "loss": 0.8338, "step": 212790 }, { "epoch": 3.736020646429888, "grad_norm": 0.054479239002870036, "learning_rate": 3.276841307079402e-05, "loss": 0.8474, "step": 212800 }, { "epoch": 3.7361962113098897, "grad_norm": 0.05663536542361728, "learning_rate": 3.2760339232534395e-05, "loss": 0.8483, "step": 212810 }, { "epoch": 3.736371776189891, "grad_norm": 0.05167780158042225, "learning_rate": 3.275226625888435e-05, "loss": 0.8416, "step": 212820 }, { "epoch": 3.736547341069892, "grad_norm": 0.06033617562744012, "learning_rate": 3.274419414994621e-05, "loss": 0.8365, "step": 212830 }, { "epoch": 3.7367229059498936, "grad_norm": 0.04518845207965968, "learning_rate": 3.2736122905822136e-05, "loss": 0.8419, "step": 212840 }, { "epoch": 3.736898470829895, "grad_norm": 0.05095376681503745, "learning_rate": 3.272805252661453e-05, "loss": 0.8423, "step": 212850 }, { "epoch": 3.7370740357098966, "grad_norm": 0.045997131138368424, "learning_rate": 3.2719983012425565e-05, "loss": 0.8459, "step": 212860 }, { "epoch": 3.737249600589898, "grad_norm": 0.051280497066916245, "learning_rate": 3.271191436335749e-05, "loss": 0.837, "step": 212870 }, { "epoch": 3.7374251654698996, "grad_norm": 0.0431800636543663, "learning_rate": 3.270384657951257e-05, "loss": 0.8388, "step": 212880 }, { "epoch": 3.737600730349901, "grad_norm": 0.06959542722354593, "learning_rate": 3.2695779660992984e-05, "loss": 0.8377, "step": 212890 }, { "epoch": 3.737776295229902, "grad_norm": 0.05167459234588063, "learning_rate": 3.2687713607900995e-05, "loss": 0.8406, "step": 212900 }, { "epoch": 3.7379518601099035, "grad_norm": 0.05168351659881412, "learning_rate": 3.267964842033873e-05, "loss": 0.8359, "step": 212910 }, { "epoch": 3.738127424989905, "grad_norm": 0.06691859158525736, "learning_rate": 3.2671584098408413e-05, "loss": 0.8379, "step": 212920 }, { "epoch": 3.7383029898699065, "grad_norm": 0.047076622863356184, "learning_rate": 3.266352064221225e-05, "loss": 0.843, "step": 212930 }, { "epoch": 3.738478554749908, "grad_norm": 0.0663041058916244, "learning_rate": 3.265545805185232e-05, "loss": 0.831, "step": 212940 }, { "epoch": 3.738654119629909, "grad_norm": 0.05222548693285011, "learning_rate": 3.26473963274309e-05, "loss": 0.8387, "step": 212950 }, { "epoch": 3.7388296845099105, "grad_norm": 0.04962120821444543, "learning_rate": 3.263933546905002e-05, "loss": 0.8379, "step": 212960 }, { "epoch": 3.739005249389912, "grad_norm": 0.06169394914642336, "learning_rate": 3.26312754768119e-05, "loss": 0.837, "step": 212970 }, { "epoch": 3.7391808142699134, "grad_norm": 0.048581970174286976, "learning_rate": 3.26232163508186e-05, "loss": 0.8414, "step": 212980 }, { "epoch": 3.739356379149915, "grad_norm": 0.04754009649103007, "learning_rate": 3.2615158091172245e-05, "loss": 0.8377, "step": 212990 }, { "epoch": 3.7395319440299164, "grad_norm": 0.0863690383297236, "learning_rate": 3.260710069797498e-05, "loss": 0.8335, "step": 213000 }, { "epoch": 3.739707508909918, "grad_norm": 0.05645693513982186, "learning_rate": 3.259904417132879e-05, "loss": 0.8424, "step": 213010 }, { "epoch": 3.739883073789919, "grad_norm": 0.0576053572986354, "learning_rate": 3.2590988511335875e-05, "loss": 0.8317, "step": 213020 }, { "epoch": 3.7400586386699204, "grad_norm": 0.059334842213628676, "learning_rate": 3.258293371809822e-05, "loss": 0.8423, "step": 213030 }, { "epoch": 3.740234203549922, "grad_norm": 0.049204802788126306, "learning_rate": 3.257487979171794e-05, "loss": 0.8348, "step": 213040 }, { "epoch": 3.7404097684299233, "grad_norm": 0.06519948768962308, "learning_rate": 3.2566826732297014e-05, "loss": 0.8413, "step": 213050 }, { "epoch": 3.740585333309925, "grad_norm": 0.055494773664093056, "learning_rate": 3.255877453993751e-05, "loss": 0.8397, "step": 213060 }, { "epoch": 3.740760898189926, "grad_norm": 0.053377860562986205, "learning_rate": 3.255072321474148e-05, "loss": 0.8373, "step": 213070 }, { "epoch": 3.7409364630699273, "grad_norm": 0.05359409376772787, "learning_rate": 3.254267275681088e-05, "loss": 0.8492, "step": 213080 }, { "epoch": 3.741112027949929, "grad_norm": 0.04834014408127544, "learning_rate": 3.253462316624773e-05, "loss": 0.8419, "step": 213090 }, { "epoch": 3.7412875928299303, "grad_norm": 0.043912593123834415, "learning_rate": 3.252657444315405e-05, "loss": 0.8419, "step": 213100 }, { "epoch": 3.7414631577099318, "grad_norm": 0.04785295730062014, "learning_rate": 3.2518526587631745e-05, "loss": 0.8447, "step": 213110 }, { "epoch": 3.7416387225899332, "grad_norm": 0.05574305330249405, "learning_rate": 3.251047959978289e-05, "loss": 0.8457, "step": 213120 }, { "epoch": 3.7418142874699347, "grad_norm": 0.06981787808186828, "learning_rate": 3.250243347970935e-05, "loss": 0.8367, "step": 213130 }, { "epoch": 3.7419898523499358, "grad_norm": 0.05365835055250077, "learning_rate": 3.2494388227513143e-05, "loss": 0.8447, "step": 213140 }, { "epoch": 3.7421654172299372, "grad_norm": 0.0645492551735919, "learning_rate": 3.2486343843296135e-05, "loss": 0.8353, "step": 213150 }, { "epoch": 3.7423409821099387, "grad_norm": 0.04144643397756968, "learning_rate": 3.247830032716027e-05, "loss": 0.846, "step": 213160 }, { "epoch": 3.74251654698994, "grad_norm": 0.05608663679280813, "learning_rate": 3.247025767920751e-05, "loss": 0.8427, "step": 213170 }, { "epoch": 3.7426921118699417, "grad_norm": 0.05051438938773103, "learning_rate": 3.2462215899539656e-05, "loss": 0.8462, "step": 213180 }, { "epoch": 3.7428676767499427, "grad_norm": 0.04994025353065721, "learning_rate": 3.245417498825874e-05, "loss": 0.8372, "step": 213190 }, { "epoch": 3.7430432416299446, "grad_norm": 0.07026939395869763, "learning_rate": 3.2446134945466504e-05, "loss": 0.8382, "step": 213200 }, { "epoch": 3.7432188065099457, "grad_norm": 0.044959209177840215, "learning_rate": 3.243809577126492e-05, "loss": 0.8406, "step": 213210 }, { "epoch": 3.743394371389947, "grad_norm": 0.05531900276404251, "learning_rate": 3.243005746575578e-05, "loss": 0.8441, "step": 213220 }, { "epoch": 3.7435699362699486, "grad_norm": 0.05576124121854887, "learning_rate": 3.242202002904094e-05, "loss": 0.84, "step": 213230 }, { "epoch": 3.74374550114995, "grad_norm": 0.05664139272585752, "learning_rate": 3.2413983461222286e-05, "loss": 0.8428, "step": 213240 }, { "epoch": 3.7439210660299516, "grad_norm": 0.05297850456416652, "learning_rate": 3.240594776240158e-05, "loss": 0.8443, "step": 213250 }, { "epoch": 3.7440966309099526, "grad_norm": 0.05494653088553492, "learning_rate": 3.239791293268065e-05, "loss": 0.8387, "step": 213260 }, { "epoch": 3.744272195789954, "grad_norm": 0.04772555079806115, "learning_rate": 3.238987897216135e-05, "loss": 0.8419, "step": 213270 }, { "epoch": 3.7444477606699555, "grad_norm": 0.050993507986515925, "learning_rate": 3.238184588094537e-05, "loss": 0.8416, "step": 213280 }, { "epoch": 3.744623325549957, "grad_norm": 0.04668356267232632, "learning_rate": 3.237381365913462e-05, "loss": 0.8347, "step": 213290 }, { "epoch": 3.7447988904299585, "grad_norm": 0.05747486312542385, "learning_rate": 3.236578230683076e-05, "loss": 0.8365, "step": 213300 }, { "epoch": 3.7449744553099595, "grad_norm": 0.048419427879040335, "learning_rate": 3.235775182413563e-05, "loss": 0.8392, "step": 213310 }, { "epoch": 3.7451500201899615, "grad_norm": 0.05188142097898497, "learning_rate": 3.2349722211150914e-05, "loss": 0.8394, "step": 213320 }, { "epoch": 3.7453255850699625, "grad_norm": 0.060438773503954095, "learning_rate": 3.234169346797836e-05, "loss": 0.8385, "step": 213330 }, { "epoch": 3.745501149949964, "grad_norm": 0.06562432787830273, "learning_rate": 3.233366559471975e-05, "loss": 0.8307, "step": 213340 }, { "epoch": 3.7456767148299654, "grad_norm": 0.05596276046089051, "learning_rate": 3.232563859147669e-05, "loss": 0.8301, "step": 213350 }, { "epoch": 3.745852279709967, "grad_norm": 0.05111357869950322, "learning_rate": 3.231761245835102e-05, "loss": 0.8448, "step": 213360 }, { "epoch": 3.7460278445899684, "grad_norm": 0.06104920430788736, "learning_rate": 3.2309587195444326e-05, "loss": 0.8436, "step": 213370 }, { "epoch": 3.7462034094699694, "grad_norm": 0.04315939170816055, "learning_rate": 3.2301562802858355e-05, "loss": 0.8367, "step": 213380 }, { "epoch": 3.746378974349971, "grad_norm": 0.05121739738738398, "learning_rate": 3.229353928069472e-05, "loss": 0.8396, "step": 213390 }, { "epoch": 3.7465545392299724, "grad_norm": 0.04762320389251111, "learning_rate": 3.228551662905511e-05, "loss": 0.8336, "step": 213400 }, { "epoch": 3.746730104109974, "grad_norm": 0.05264540865600846, "learning_rate": 3.2277494848041215e-05, "loss": 0.8389, "step": 213410 }, { "epoch": 3.7469056689899753, "grad_norm": 0.05206159906368804, "learning_rate": 3.226947393775459e-05, "loss": 0.8378, "step": 213420 }, { "epoch": 3.7470812338699764, "grad_norm": 0.04967815024854766, "learning_rate": 3.226145389829689e-05, "loss": 0.8349, "step": 213430 }, { "epoch": 3.7472567987499783, "grad_norm": 0.05500309692702122, "learning_rate": 3.2253434729769786e-05, "loss": 0.8509, "step": 213440 }, { "epoch": 3.7474323636299793, "grad_norm": 0.06060620454170647, "learning_rate": 3.224541643227478e-05, "loss": 0.835, "step": 213450 }, { "epoch": 3.747607928509981, "grad_norm": 0.09249681476633348, "learning_rate": 3.2237399005913586e-05, "loss": 0.8343, "step": 213460 }, { "epoch": 3.7477834933899823, "grad_norm": 0.04816263909446041, "learning_rate": 3.222938245078769e-05, "loss": 0.8302, "step": 213470 }, { "epoch": 3.7479590582699838, "grad_norm": 0.052577426731901, "learning_rate": 3.222136676699873e-05, "loss": 0.8472, "step": 213480 }, { "epoch": 3.7481346231499852, "grad_norm": 0.054376931701859796, "learning_rate": 3.221335195464821e-05, "loss": 0.8413, "step": 213490 }, { "epoch": 3.7483101880299863, "grad_norm": 0.0614935105535678, "learning_rate": 3.2205338013837705e-05, "loss": 0.8402, "step": 213500 }, { "epoch": 3.7484857529099878, "grad_norm": 0.04978736210627789, "learning_rate": 3.2197324944668765e-05, "loss": 0.8436, "step": 213510 }, { "epoch": 3.7486613177899892, "grad_norm": 0.06594781793569407, "learning_rate": 3.2189312747242864e-05, "loss": 0.8381, "step": 213520 }, { "epoch": 3.7488368826699907, "grad_norm": 0.07056900841448353, "learning_rate": 3.21813014216616e-05, "loss": 0.8386, "step": 213530 }, { "epoch": 3.749012447549992, "grad_norm": 0.06395671849155876, "learning_rate": 3.217329096802642e-05, "loss": 0.8298, "step": 213540 }, { "epoch": 3.7491880124299937, "grad_norm": 0.047459862464976334, "learning_rate": 3.216528138643886e-05, "loss": 0.8372, "step": 213550 }, { "epoch": 3.749363577309995, "grad_norm": 0.04704212463139129, "learning_rate": 3.215727267700034e-05, "loss": 0.8404, "step": 213560 }, { "epoch": 3.749539142189996, "grad_norm": 0.056722427534958646, "learning_rate": 3.214926483981237e-05, "loss": 0.8344, "step": 213570 }, { "epoch": 3.7497147070699977, "grad_norm": 0.04955617832678936, "learning_rate": 3.2141257874976455e-05, "loss": 0.8459, "step": 213580 }, { "epoch": 3.749890271949999, "grad_norm": 0.06899175510832217, "learning_rate": 3.213325178259396e-05, "loss": 0.8327, "step": 213590 }, { "epoch": 3.7500658368300006, "grad_norm": 0.04705441625306072, "learning_rate": 3.212524656276636e-05, "loss": 0.8418, "step": 213600 }, { "epoch": 3.750241401710002, "grad_norm": 0.046864475198779516, "learning_rate": 3.211724221559511e-05, "loss": 0.8454, "step": 213610 }, { "epoch": 3.750416966590003, "grad_norm": 0.05860389818951293, "learning_rate": 3.2109238741181555e-05, "loss": 0.8465, "step": 213620 }, { "epoch": 3.7505925314700046, "grad_norm": 0.05357046673281773, "learning_rate": 3.21012361396272e-05, "loss": 0.8417, "step": 213630 }, { "epoch": 3.750768096350006, "grad_norm": 0.05986542209869112, "learning_rate": 3.209323441103336e-05, "loss": 0.8444, "step": 213640 }, { "epoch": 3.7509436612300076, "grad_norm": 0.054366022337615866, "learning_rate": 3.208523355550148e-05, "loss": 0.8398, "step": 213650 }, { "epoch": 3.751119226110009, "grad_norm": 0.0506811598667191, "learning_rate": 3.207723357313286e-05, "loss": 0.8425, "step": 213660 }, { "epoch": 3.7512947909900105, "grad_norm": 0.04125123623459474, "learning_rate": 3.206923446402891e-05, "loss": 0.8464, "step": 213670 }, { "epoch": 3.751470355870012, "grad_norm": 0.045597924349650196, "learning_rate": 3.2061236228290994e-05, "loss": 0.8393, "step": 213680 }, { "epoch": 3.751645920750013, "grad_norm": 0.04328383646848937, "learning_rate": 3.205323886602036e-05, "loss": 0.8335, "step": 213690 }, { "epoch": 3.7518214856300145, "grad_norm": 0.06740286412648847, "learning_rate": 3.2045242377318474e-05, "loss": 0.8352, "step": 213700 }, { "epoch": 3.751997050510016, "grad_norm": 0.04099787781398983, "learning_rate": 3.2037246762286553e-05, "loss": 0.8358, "step": 213710 }, { "epoch": 3.7521726153900175, "grad_norm": 0.04985108823289829, "learning_rate": 3.202925202102596e-05, "loss": 0.8355, "step": 213720 }, { "epoch": 3.752348180270019, "grad_norm": 0.07003135922803369, "learning_rate": 3.202125815363792e-05, "loss": 0.8348, "step": 213730 }, { "epoch": 3.75252374515002, "grad_norm": 0.06644724657068816, "learning_rate": 3.2013265160223775e-05, "loss": 0.8427, "step": 213740 }, { "epoch": 3.7526993100300214, "grad_norm": 0.05447165400608822, "learning_rate": 3.2005273040884806e-05, "loss": 0.8327, "step": 213750 }, { "epoch": 3.752874874910023, "grad_norm": 0.07998833882818103, "learning_rate": 3.199728179572222e-05, "loss": 0.8347, "step": 213760 }, { "epoch": 3.7530504397900244, "grad_norm": 0.04143686460733736, "learning_rate": 3.1989291424837294e-05, "loss": 0.8404, "step": 213770 }, { "epoch": 3.753226004670026, "grad_norm": 0.05703999206148614, "learning_rate": 3.1981301928331306e-05, "loss": 0.8356, "step": 213780 }, { "epoch": 3.7534015695500274, "grad_norm": 0.05821492262757193, "learning_rate": 3.1973313306305386e-05, "loss": 0.8354, "step": 213790 }, { "epoch": 3.753577134430029, "grad_norm": 0.0558246106230869, "learning_rate": 3.196532555886088e-05, "loss": 0.8439, "step": 213800 }, { "epoch": 3.75375269931003, "grad_norm": 0.046871973928098526, "learning_rate": 3.195733868609889e-05, "loss": 0.8479, "step": 213810 }, { "epoch": 3.7539282641900313, "grad_norm": 0.053174997930230984, "learning_rate": 3.19493526881207e-05, "loss": 0.8448, "step": 213820 }, { "epoch": 3.754103829070033, "grad_norm": 0.06634945551396064, "learning_rate": 3.19413675650274e-05, "loss": 0.8444, "step": 213830 }, { "epoch": 3.7542793939500343, "grad_norm": 0.08395378315318411, "learning_rate": 3.193338331692021e-05, "loss": 0.8417, "step": 213840 }, { "epoch": 3.7544549588300358, "grad_norm": 0.053491354587455756, "learning_rate": 3.192539994390033e-05, "loss": 0.8439, "step": 213850 }, { "epoch": 3.754630523710037, "grad_norm": 0.04519558021713755, "learning_rate": 3.19174174460688e-05, "loss": 0.84, "step": 213860 }, { "epoch": 3.7548060885900387, "grad_norm": 0.05113229809872996, "learning_rate": 3.190943582352689e-05, "loss": 0.8407, "step": 213870 }, { "epoch": 3.7549816534700398, "grad_norm": 0.06001862333303167, "learning_rate": 3.190145507637564e-05, "loss": 0.8438, "step": 213880 }, { "epoch": 3.7551572183500412, "grad_norm": 0.06615345390117666, "learning_rate": 3.1893475204716236e-05, "loss": 0.8328, "step": 213890 }, { "epoch": 3.7553327832300427, "grad_norm": 0.05083237032673574, "learning_rate": 3.18854962086497e-05, "loss": 0.8456, "step": 213900 }, { "epoch": 3.755508348110044, "grad_norm": 0.05662274195860441, "learning_rate": 3.187751808827718e-05, "loss": 0.8331, "step": 213910 }, { "epoch": 3.7556839129900457, "grad_norm": 0.059098721563691926, "learning_rate": 3.18695408436998e-05, "loss": 0.8392, "step": 213920 }, { "epoch": 3.7558594778700467, "grad_norm": 0.05391597183858666, "learning_rate": 3.186156447501853e-05, "loss": 0.8403, "step": 213930 }, { "epoch": 3.756035042750048, "grad_norm": 0.044621503805228786, "learning_rate": 3.1853588982334504e-05, "loss": 0.841, "step": 213940 }, { "epoch": 3.7562106076300497, "grad_norm": 0.05258137856841265, "learning_rate": 3.184561436574878e-05, "loss": 0.8399, "step": 213950 }, { "epoch": 3.756386172510051, "grad_norm": 0.0447774772178877, "learning_rate": 3.183764062536232e-05, "loss": 0.8484, "step": 213960 }, { "epoch": 3.7565617373900526, "grad_norm": 0.04956275412081876, "learning_rate": 3.182966776127626e-05, "loss": 0.8429, "step": 213970 }, { "epoch": 3.7567373022700536, "grad_norm": 0.03885121968021897, "learning_rate": 3.1821695773591545e-05, "loss": 0.8404, "step": 213980 }, { "epoch": 3.7569128671500556, "grad_norm": 0.04748589779999779, "learning_rate": 3.181372466240923e-05, "loss": 0.8439, "step": 213990 }, { "epoch": 3.7570884320300566, "grad_norm": 0.0519219003196956, "learning_rate": 3.180575442783025e-05, "loss": 0.841, "step": 214000 }, { "epoch": 3.757263996910058, "grad_norm": 0.051529702831242134, "learning_rate": 3.179778506995562e-05, "loss": 0.8344, "step": 214010 }, { "epoch": 3.7574395617900596, "grad_norm": 0.047060446307170246, "learning_rate": 3.1789816588886364e-05, "loss": 0.8388, "step": 214020 }, { "epoch": 3.757615126670061, "grad_norm": 0.055677310971003126, "learning_rate": 3.1781848984723315e-05, "loss": 0.8431, "step": 214030 }, { "epoch": 3.7577906915500625, "grad_norm": 0.06897643085017577, "learning_rate": 3.1773882257567576e-05, "loss": 0.844, "step": 214040 }, { "epoch": 3.7579662564300635, "grad_norm": 0.0442353697057626, "learning_rate": 3.176591640751998e-05, "loss": 0.8446, "step": 214050 }, { "epoch": 3.758141821310065, "grad_norm": 0.057845810033544005, "learning_rate": 3.175795143468153e-05, "loss": 0.8376, "step": 214060 }, { "epoch": 3.7583173861900665, "grad_norm": 0.05713517601033358, "learning_rate": 3.1749987339153066e-05, "loss": 0.8468, "step": 214070 }, { "epoch": 3.758492951070068, "grad_norm": 0.04449143447031028, "learning_rate": 3.1742024121035534e-05, "loss": 0.8499, "step": 214080 }, { "epoch": 3.7586685159500695, "grad_norm": 0.05026651155529376, "learning_rate": 3.173406178042987e-05, "loss": 0.8418, "step": 214090 }, { "epoch": 3.7588440808300705, "grad_norm": 0.06450275738445041, "learning_rate": 3.1726100317436874e-05, "loss": 0.8316, "step": 214100 }, { "epoch": 3.7590196457100724, "grad_norm": 0.06434621798063817, "learning_rate": 3.1718139732157475e-05, "loss": 0.8405, "step": 214110 }, { "epoch": 3.7591952105900734, "grad_norm": 0.054184979828047944, "learning_rate": 3.171018002469254e-05, "loss": 0.8342, "step": 214120 }, { "epoch": 3.759370775470075, "grad_norm": 0.06918711935945493, "learning_rate": 3.170222119514286e-05, "loss": 0.8342, "step": 214130 }, { "epoch": 3.7595463403500764, "grad_norm": 0.0467718396242105, "learning_rate": 3.169426324360937e-05, "loss": 0.842, "step": 214140 }, { "epoch": 3.759721905230078, "grad_norm": 0.04757587984929311, "learning_rate": 3.168630617019282e-05, "loss": 0.8384, "step": 214150 }, { "epoch": 3.7598974701100794, "grad_norm": 0.0412468609287616, "learning_rate": 3.1678349974994084e-05, "loss": 0.8422, "step": 214160 }, { "epoch": 3.7600730349900804, "grad_norm": 0.06775811921951674, "learning_rate": 3.167039465811391e-05, "loss": 0.8447, "step": 214170 }, { "epoch": 3.760248599870082, "grad_norm": 0.06932886932636603, "learning_rate": 3.166244021965311e-05, "loss": 0.8433, "step": 214180 }, { "epoch": 3.7604241647500833, "grad_norm": 0.05087246654773243, "learning_rate": 3.165448665971253e-05, "loss": 0.8517, "step": 214190 }, { "epoch": 3.760599729630085, "grad_norm": 0.05445977732139236, "learning_rate": 3.164653397839282e-05, "loss": 0.834, "step": 214200 }, { "epoch": 3.7607752945100863, "grad_norm": 0.047695214394711354, "learning_rate": 3.163858217579488e-05, "loss": 0.8473, "step": 214210 }, { "epoch": 3.7609508593900873, "grad_norm": 0.05145803433670018, "learning_rate": 3.163063125201937e-05, "loss": 0.8387, "step": 214220 }, { "epoch": 3.7611264242700893, "grad_norm": 0.05226971287621462, "learning_rate": 3.162268120716708e-05, "loss": 0.833, "step": 214230 }, { "epoch": 3.7613019891500903, "grad_norm": 0.03938489541344219, "learning_rate": 3.16147320413387e-05, "loss": 0.843, "step": 214240 }, { "epoch": 3.7614775540300918, "grad_norm": 0.05448312895268502, "learning_rate": 3.160678375463495e-05, "loss": 0.8455, "step": 214250 }, { "epoch": 3.7616531189100932, "grad_norm": 0.050366819185102926, "learning_rate": 3.159883634715659e-05, "loss": 0.8498, "step": 214260 }, { "epoch": 3.7618286837900947, "grad_norm": 0.06605223411829785, "learning_rate": 3.159088981900423e-05, "loss": 0.8441, "step": 214270 }, { "epoch": 3.762004248670096, "grad_norm": 0.04857349802126919, "learning_rate": 3.15829441702786e-05, "loss": 0.8379, "step": 214280 }, { "epoch": 3.7621798135500972, "grad_norm": 0.07048388432624449, "learning_rate": 3.157499940108041e-05, "loss": 0.8407, "step": 214290 }, { "epoch": 3.7623553784300987, "grad_norm": 0.10332142879629053, "learning_rate": 3.156705551151021e-05, "loss": 0.8378, "step": 214300 }, { "epoch": 3.7625309433101, "grad_norm": 0.05030540057669689, "learning_rate": 3.1559112501668795e-05, "loss": 0.837, "step": 214310 }, { "epoch": 3.7627065081901017, "grad_norm": 0.049760810080321516, "learning_rate": 3.155117037165671e-05, "loss": 0.8397, "step": 214320 }, { "epoch": 3.762882073070103, "grad_norm": 0.0514411586564239, "learning_rate": 3.154322912157462e-05, "loss": 0.8309, "step": 214330 }, { "epoch": 3.7630576379501046, "grad_norm": 0.048846354078957446, "learning_rate": 3.1535288751523094e-05, "loss": 0.8423, "step": 214340 }, { "epoch": 3.763233202830106, "grad_norm": 0.05629231890525461, "learning_rate": 3.152734926160279e-05, "loss": 0.8359, "step": 214350 }, { "epoch": 3.763408767710107, "grad_norm": 0.05257035444178978, "learning_rate": 3.151941065191431e-05, "loss": 0.8381, "step": 214360 }, { "epoch": 3.7635843325901086, "grad_norm": 0.05054777424359432, "learning_rate": 3.151147292255815e-05, "loss": 0.8379, "step": 214370 }, { "epoch": 3.76375989747011, "grad_norm": 0.053490764587493314, "learning_rate": 3.1503536073635006e-05, "loss": 0.8384, "step": 214380 }, { "epoch": 3.7639354623501116, "grad_norm": 0.06545251587475875, "learning_rate": 3.1495600105245345e-05, "loss": 0.8355, "step": 214390 }, { "epoch": 3.764111027230113, "grad_norm": 0.053474648589710544, "learning_rate": 3.1487665017489785e-05, "loss": 0.8347, "step": 214400 }, { "epoch": 3.764286592110114, "grad_norm": 0.05340810821084325, "learning_rate": 3.14797308104688e-05, "loss": 0.8379, "step": 214410 }, { "epoch": 3.7644621569901155, "grad_norm": 0.06483030126410083, "learning_rate": 3.147179748428295e-05, "loss": 0.8389, "step": 214420 }, { "epoch": 3.764637721870117, "grad_norm": 0.05697506856417742, "learning_rate": 3.146386503903278e-05, "loss": 0.828, "step": 214430 }, { "epoch": 3.7648132867501185, "grad_norm": 0.05680427472198276, "learning_rate": 3.145593347481875e-05, "loss": 0.8379, "step": 214440 }, { "epoch": 3.76498885163012, "grad_norm": 0.053239357163208025, "learning_rate": 3.144800279174135e-05, "loss": 0.846, "step": 214450 }, { "epoch": 3.7651644165101215, "grad_norm": 0.04103739888452774, "learning_rate": 3.144007298990112e-05, "loss": 0.8424, "step": 214460 }, { "epoch": 3.765339981390123, "grad_norm": 0.05558846496694252, "learning_rate": 3.143214406939845e-05, "loss": 0.8391, "step": 214470 }, { "epoch": 3.765515546270124, "grad_norm": 0.04613545897139286, "learning_rate": 3.142421603033389e-05, "loss": 0.8364, "step": 214480 }, { "epoch": 3.7656911111501254, "grad_norm": 0.055878749250400296, "learning_rate": 3.141628887280782e-05, "loss": 0.837, "step": 214490 }, { "epoch": 3.765866676030127, "grad_norm": 0.04819806765361641, "learning_rate": 3.1408362596920745e-05, "loss": 0.8335, "step": 214500 }, { "epoch": 3.7660422409101284, "grad_norm": 0.05313837444408743, "learning_rate": 3.1400437202773006e-05, "loss": 0.8361, "step": 214510 }, { "epoch": 3.76621780579013, "grad_norm": 0.051149374024312154, "learning_rate": 3.139251269046508e-05, "loss": 0.8376, "step": 214520 }, { "epoch": 3.766393370670131, "grad_norm": 0.049805921886823756, "learning_rate": 3.138458906009739e-05, "loss": 0.8359, "step": 214530 }, { "epoch": 3.7665689355501324, "grad_norm": 0.05183940309262988, "learning_rate": 3.137666631177024e-05, "loss": 0.8405, "step": 214540 }, { "epoch": 3.766744500430134, "grad_norm": 0.05421425552293453, "learning_rate": 3.1368744445584125e-05, "loss": 0.8372, "step": 214550 }, { "epoch": 3.7669200653101353, "grad_norm": 0.06399373108711016, "learning_rate": 3.1360823461639335e-05, "loss": 0.8331, "step": 214560 }, { "epoch": 3.767095630190137, "grad_norm": 0.050621161299657415, "learning_rate": 3.135290336003628e-05, "loss": 0.8462, "step": 214570 }, { "epoch": 3.7672711950701383, "grad_norm": 0.05075418015990819, "learning_rate": 3.134498414087526e-05, "loss": 0.8369, "step": 214580 }, { "epoch": 3.76744675995014, "grad_norm": 0.05422739600880449, "learning_rate": 3.1337065804256644e-05, "loss": 0.8422, "step": 214590 }, { "epoch": 3.767622324830141, "grad_norm": 0.05295289122255324, "learning_rate": 3.132914835028079e-05, "loss": 0.8397, "step": 214600 }, { "epoch": 3.7677978897101423, "grad_norm": 0.05526757842744108, "learning_rate": 3.132123177904794e-05, "loss": 0.8352, "step": 214610 }, { "epoch": 3.7679734545901438, "grad_norm": 0.06328790620015426, "learning_rate": 3.1313316090658446e-05, "loss": 0.8373, "step": 214620 }, { "epoch": 3.7681490194701452, "grad_norm": 0.047944903709310815, "learning_rate": 3.130540128521261e-05, "loss": 0.8328, "step": 214630 }, { "epoch": 3.7683245843501467, "grad_norm": 0.04863315004914211, "learning_rate": 3.129748736281064e-05, "loss": 0.8406, "step": 214640 }, { "epoch": 3.7685001492301478, "grad_norm": 0.05292718383359451, "learning_rate": 3.128957432355293e-05, "loss": 0.8349, "step": 214650 }, { "epoch": 3.7686757141101497, "grad_norm": 0.06542805809791198, "learning_rate": 3.128166216753964e-05, "loss": 0.8359, "step": 214660 }, { "epoch": 3.7688512789901507, "grad_norm": 0.05390036070654228, "learning_rate": 3.1273750894871086e-05, "loss": 0.8382, "step": 214670 }, { "epoch": 3.769026843870152, "grad_norm": 0.05916077538144203, "learning_rate": 3.1265840505647435e-05, "loss": 0.8412, "step": 214680 }, { "epoch": 3.7692024087501537, "grad_norm": 0.04646706497880775, "learning_rate": 3.1257930999968954e-05, "loss": 0.839, "step": 214690 }, { "epoch": 3.769377973630155, "grad_norm": 0.0676954843135536, "learning_rate": 3.1250022377935877e-05, "loss": 0.851, "step": 214700 }, { "epoch": 3.7695535385101566, "grad_norm": 0.057352281986241906, "learning_rate": 3.124211463964833e-05, "loss": 0.8355, "step": 214710 }, { "epoch": 3.7697291033901577, "grad_norm": 0.06521074955126209, "learning_rate": 3.1234207785206616e-05, "loss": 0.8409, "step": 214720 }, { "epoch": 3.769904668270159, "grad_norm": 0.07554077281564216, "learning_rate": 3.122630181471083e-05, "loss": 0.8385, "step": 214730 }, { "epoch": 3.7700802331501606, "grad_norm": 0.05582010746249529, "learning_rate": 3.121839672826121e-05, "loss": 0.8474, "step": 214740 }, { "epoch": 3.770255798030162, "grad_norm": 0.056577948435699035, "learning_rate": 3.121049252595784e-05, "loss": 0.8403, "step": 214750 }, { "epoch": 3.7704313629101636, "grad_norm": 0.05032330714640675, "learning_rate": 3.12025892079009e-05, "loss": 0.8347, "step": 214760 }, { "epoch": 3.7706069277901646, "grad_norm": 0.046431535376464786, "learning_rate": 3.119468677419057e-05, "loss": 0.8507, "step": 214770 }, { "epoch": 3.7707824926701665, "grad_norm": 0.05384863718373892, "learning_rate": 3.1186785224926906e-05, "loss": 0.8422, "step": 214780 }, { "epoch": 3.7709580575501676, "grad_norm": 0.05168688932333141, "learning_rate": 3.1178884560210044e-05, "loss": 0.8386, "step": 214790 }, { "epoch": 3.771133622430169, "grad_norm": 0.04699318020312147, "learning_rate": 3.117098478014014e-05, "loss": 0.8457, "step": 214800 }, { "epoch": 3.7713091873101705, "grad_norm": 0.06316851576965823, "learning_rate": 3.116308588481717e-05, "loss": 0.8395, "step": 214810 }, { "epoch": 3.771484752190172, "grad_norm": 0.06176929971716442, "learning_rate": 3.1155187874341344e-05, "loss": 0.8321, "step": 214820 }, { "epoch": 3.7716603170701735, "grad_norm": 0.052677107687728625, "learning_rate": 3.114729074881265e-05, "loss": 0.8487, "step": 214830 }, { "epoch": 3.7718358819501745, "grad_norm": 0.05965270599862889, "learning_rate": 3.11393945083312e-05, "loss": 0.8376, "step": 214840 }, { "epoch": 3.772011446830176, "grad_norm": 0.06299103570941512, "learning_rate": 3.113149915299698e-05, "loss": 0.8433, "step": 214850 }, { "epoch": 3.7721870117101775, "grad_norm": 0.050630840790215, "learning_rate": 3.112360468291006e-05, "loss": 0.8432, "step": 214860 }, { "epoch": 3.772362576590179, "grad_norm": 0.04546468742196688, "learning_rate": 3.111571109817048e-05, "loss": 0.838, "step": 214870 }, { "epoch": 3.7725381414701804, "grad_norm": 0.05498642226426605, "learning_rate": 3.110781839887817e-05, "loss": 0.8346, "step": 214880 }, { "epoch": 3.7727137063501814, "grad_norm": 0.05259495462475592, "learning_rate": 3.109992658513327e-05, "loss": 0.8361, "step": 214890 }, { "epoch": 3.7728892712301834, "grad_norm": 0.06251902781500743, "learning_rate": 3.109203565703566e-05, "loss": 0.8402, "step": 214900 }, { "epoch": 3.7730648361101844, "grad_norm": 0.06534369251106235, "learning_rate": 3.108414561468539e-05, "loss": 0.8451, "step": 214910 }, { "epoch": 3.773240400990186, "grad_norm": 0.04734509617916443, "learning_rate": 3.107625645818236e-05, "loss": 0.84, "step": 214920 }, { "epoch": 3.7734159658701873, "grad_norm": 0.0473443245470149, "learning_rate": 3.106836818762657e-05, "loss": 0.8462, "step": 214930 }, { "epoch": 3.773591530750189, "grad_norm": 0.07728783719469803, "learning_rate": 3.106048080311797e-05, "loss": 0.8368, "step": 214940 }, { "epoch": 3.7737670956301903, "grad_norm": 0.0622053457924769, "learning_rate": 3.105259430475647e-05, "loss": 0.8335, "step": 214950 }, { "epoch": 3.7739426605101913, "grad_norm": 0.045920607428883424, "learning_rate": 3.1044708692641995e-05, "loss": 0.843, "step": 214960 }, { "epoch": 3.774118225390193, "grad_norm": 0.06016679314445572, "learning_rate": 3.10368239668745e-05, "loss": 0.8423, "step": 214970 }, { "epoch": 3.7742937902701943, "grad_norm": 0.059098989339960696, "learning_rate": 3.102894012755379e-05, "loss": 0.836, "step": 214980 }, { "epoch": 3.7744693551501958, "grad_norm": 0.06331632995032194, "learning_rate": 3.1021057174779884e-05, "loss": 0.8355, "step": 214990 }, { "epoch": 3.7746449200301972, "grad_norm": 0.0455579665691247, "learning_rate": 3.101317510865257e-05, "loss": 0.8371, "step": 215000 }, { "epoch": 3.7748204849101987, "grad_norm": 0.056270028737222276, "learning_rate": 3.1005293929271756e-05, "loss": 0.8385, "step": 215010 }, { "epoch": 3.7749960497902, "grad_norm": 0.06619506342523751, "learning_rate": 3.099741363673726e-05, "loss": 0.8381, "step": 215020 }, { "epoch": 3.7751716146702012, "grad_norm": 0.052349983403559786, "learning_rate": 3.0989534231148954e-05, "loss": 0.8352, "step": 215030 }, { "epoch": 3.7753471795502027, "grad_norm": 0.061581640658897395, "learning_rate": 3.098165571260669e-05, "loss": 0.8386, "step": 215040 }, { "epoch": 3.775522744430204, "grad_norm": 0.05873422074326308, "learning_rate": 3.097377808121022e-05, "loss": 0.8406, "step": 215050 }, { "epoch": 3.7756983093102057, "grad_norm": 0.06118814351093374, "learning_rate": 3.0965901337059445e-05, "loss": 0.8412, "step": 215060 }, { "epoch": 3.775873874190207, "grad_norm": 0.05597853768959964, "learning_rate": 3.0958025480254094e-05, "loss": 0.8391, "step": 215070 }, { "epoch": 3.776049439070208, "grad_norm": 0.05044440052805297, "learning_rate": 3.095015051089402e-05, "loss": 0.8406, "step": 215080 }, { "epoch": 3.7762250039502097, "grad_norm": 0.09688000308909343, "learning_rate": 3.094227642907893e-05, "loss": 0.84, "step": 215090 }, { "epoch": 3.776400568830211, "grad_norm": 0.05919136246187355, "learning_rate": 3.0934403234908624e-05, "loss": 0.8397, "step": 215100 }, { "epoch": 3.7765761337102126, "grad_norm": 0.050213728958515946, "learning_rate": 3.0926530928482895e-05, "loss": 0.8362, "step": 215110 }, { "epoch": 3.776751698590214, "grad_norm": 0.05634246820129658, "learning_rate": 3.091865950990141e-05, "loss": 0.8428, "step": 215120 }, { "epoch": 3.7769272634702156, "grad_norm": 0.06825150029685785, "learning_rate": 3.091078897926394e-05, "loss": 0.8353, "step": 215130 }, { "epoch": 3.777102828350217, "grad_norm": 0.04967298662440711, "learning_rate": 3.0902919336670244e-05, "loss": 0.8373, "step": 215140 }, { "epoch": 3.777278393230218, "grad_norm": 0.045602355802380576, "learning_rate": 3.0895050582219926e-05, "loss": 0.8457, "step": 215150 }, { "epoch": 3.7774539581102196, "grad_norm": 0.04789382313825578, "learning_rate": 3.088718271601282e-05, "loss": 0.8415, "step": 215160 }, { "epoch": 3.777629522990221, "grad_norm": 0.041357313624311054, "learning_rate": 3.087931573814851e-05, "loss": 0.8435, "step": 215170 }, { "epoch": 3.7778050878702225, "grad_norm": 0.062122638352257675, "learning_rate": 3.087144964872674e-05, "loss": 0.8379, "step": 215180 }, { "epoch": 3.777980652750224, "grad_norm": 0.05875042793546874, "learning_rate": 3.086358444784712e-05, "loss": 0.8294, "step": 215190 }, { "epoch": 3.778156217630225, "grad_norm": 0.05914537848232735, "learning_rate": 3.0855720135609316e-05, "loss": 0.837, "step": 215200 }, { "epoch": 3.7783317825102265, "grad_norm": 0.06904318211613426, "learning_rate": 3.0847856712113006e-05, "loss": 0.8213, "step": 215210 }, { "epoch": 3.778507347390228, "grad_norm": 0.06829360851028685, "learning_rate": 3.0839994177457746e-05, "loss": 0.8406, "step": 215220 }, { "epoch": 3.7786829122702295, "grad_norm": 0.05679604729608953, "learning_rate": 3.083213253174327e-05, "loss": 0.8382, "step": 215230 }, { "epoch": 3.778858477150231, "grad_norm": 0.047989893449967846, "learning_rate": 3.082427177506909e-05, "loss": 0.842, "step": 215240 }, { "epoch": 3.7790340420302324, "grad_norm": 0.051064314686044304, "learning_rate": 3.081641190753485e-05, "loss": 0.836, "step": 215250 }, { "epoch": 3.779209606910234, "grad_norm": 0.05735085245991922, "learning_rate": 3.080855292924011e-05, "loss": 0.85, "step": 215260 }, { "epoch": 3.779385171790235, "grad_norm": 0.05731658130751086, "learning_rate": 3.0800694840284454e-05, "loss": 0.8302, "step": 215270 }, { "epoch": 3.7795607366702364, "grad_norm": 0.049953436565690255, "learning_rate": 3.079283764076748e-05, "loss": 0.8446, "step": 215280 }, { "epoch": 3.779736301550238, "grad_norm": 0.055522066832945555, "learning_rate": 3.0784981330788665e-05, "loss": 0.8422, "step": 215290 }, { "epoch": 3.7799118664302394, "grad_norm": 0.07063263770227932, "learning_rate": 3.07771259104476e-05, "loss": 0.8423, "step": 215300 }, { "epoch": 3.780087431310241, "grad_norm": 0.06584139501401648, "learning_rate": 3.076927137984384e-05, "loss": 0.8435, "step": 215310 }, { "epoch": 3.780262996190242, "grad_norm": 0.06019011430017405, "learning_rate": 3.076141773907681e-05, "loss": 0.8429, "step": 215320 }, { "epoch": 3.780438561070244, "grad_norm": 0.05542399200891208, "learning_rate": 3.075356498824614e-05, "loss": 0.8382, "step": 215330 }, { "epoch": 3.780614125950245, "grad_norm": 0.05859702644091471, "learning_rate": 3.074571312745124e-05, "loss": 0.8417, "step": 215340 }, { "epoch": 3.7807896908302463, "grad_norm": 0.05185774892913654, "learning_rate": 3.073786215679164e-05, "loss": 0.8412, "step": 215350 }, { "epoch": 3.7809652557102478, "grad_norm": 0.052722795215097554, "learning_rate": 3.073001207636677e-05, "loss": 0.8347, "step": 215360 }, { "epoch": 3.7811408205902493, "grad_norm": 0.05609874504596849, "learning_rate": 3.0722162886276104e-05, "loss": 0.832, "step": 215370 }, { "epoch": 3.7813163854702507, "grad_norm": 0.04909030779154749, "learning_rate": 3.071431458661914e-05, "loss": 0.8354, "step": 215380 }, { "epoch": 3.7814919503502518, "grad_norm": 0.06083218635190068, "learning_rate": 3.070646717749521e-05, "loss": 0.8407, "step": 215390 }, { "epoch": 3.7816675152302532, "grad_norm": 0.04981362465027949, "learning_rate": 3.0698620659003884e-05, "loss": 0.8371, "step": 215400 }, { "epoch": 3.7818430801102547, "grad_norm": 0.06316740045395042, "learning_rate": 3.069077503124447e-05, "loss": 0.8388, "step": 215410 }, { "epoch": 3.782018644990256, "grad_norm": 0.06442246711654936, "learning_rate": 3.0682930294316444e-05, "loss": 0.8433, "step": 215420 }, { "epoch": 3.7821942098702577, "grad_norm": 0.05881522721056101, "learning_rate": 3.0675086448319145e-05, "loss": 0.8309, "step": 215430 }, { "epoch": 3.7823697747502587, "grad_norm": 0.05754131021574009, "learning_rate": 3.066724349335197e-05, "loss": 0.8382, "step": 215440 }, { "epoch": 3.7825453396302606, "grad_norm": 0.04494329725005815, "learning_rate": 3.065940142951432e-05, "loss": 0.8323, "step": 215450 }, { "epoch": 3.7827209045102617, "grad_norm": 0.05403052969315236, "learning_rate": 3.065156025690552e-05, "loss": 0.8388, "step": 215460 }, { "epoch": 3.782896469390263, "grad_norm": 0.04719214404733549, "learning_rate": 3.0643719975624934e-05, "loss": 0.8361, "step": 215470 }, { "epoch": 3.7830720342702646, "grad_norm": 0.042984672109623534, "learning_rate": 3.0635880585771924e-05, "loss": 0.8392, "step": 215480 }, { "epoch": 3.783247599150266, "grad_norm": 0.06414685739086344, "learning_rate": 3.062804208744573e-05, "loss": 0.8381, "step": 215490 }, { "epoch": 3.7834231640302676, "grad_norm": 0.07203712267869565, "learning_rate": 3.062020448074579e-05, "loss": 0.8379, "step": 215500 }, { "epoch": 3.7835987289102686, "grad_norm": 0.045351065210622, "learning_rate": 3.0612367765771325e-05, "loss": 0.8382, "step": 215510 }, { "epoch": 3.78377429379027, "grad_norm": 0.044828275661691196, "learning_rate": 3.060453194262168e-05, "loss": 0.8452, "step": 215520 }, { "epoch": 3.7839498586702716, "grad_norm": 0.058324104224894084, "learning_rate": 3.059669701139607e-05, "loss": 0.835, "step": 215530 }, { "epoch": 3.784125423550273, "grad_norm": 0.04591848667297986, "learning_rate": 3.058886297219379e-05, "loss": 0.8415, "step": 215540 }, { "epoch": 3.7843009884302745, "grad_norm": 0.051982067761349504, "learning_rate": 3.058102982511416e-05, "loss": 0.8337, "step": 215550 }, { "epoch": 3.7844765533102755, "grad_norm": 0.07623465098646076, "learning_rate": 3.057319757025631e-05, "loss": 0.8356, "step": 215560 }, { "epoch": 3.7846521181902775, "grad_norm": 0.05385933264806462, "learning_rate": 3.056536620771961e-05, "loss": 0.8405, "step": 215570 }, { "epoch": 3.7848276830702785, "grad_norm": 0.054243158673813685, "learning_rate": 3.0557535737603184e-05, "loss": 0.8442, "step": 215580 }, { "epoch": 3.78500324795028, "grad_norm": 0.06176167312794796, "learning_rate": 3.0549706160006306e-05, "loss": 0.833, "step": 215590 }, { "epoch": 3.7851788128302815, "grad_norm": 0.05363638634939989, "learning_rate": 3.054187747502813e-05, "loss": 0.834, "step": 215600 }, { "epoch": 3.785354377710283, "grad_norm": 0.0513489153381955, "learning_rate": 3.0534049682767865e-05, "loss": 0.8378, "step": 215610 }, { "epoch": 3.7855299425902844, "grad_norm": 0.05073797200069079, "learning_rate": 3.0526222783324726e-05, "loss": 0.8392, "step": 215620 }, { "epoch": 3.7857055074702854, "grad_norm": 0.05720564721924178, "learning_rate": 3.0518396776797834e-05, "loss": 0.849, "step": 215630 }, { "epoch": 3.785881072350287, "grad_norm": 0.05785800554680885, "learning_rate": 3.0510571663286337e-05, "loss": 0.8381, "step": 215640 }, { "epoch": 3.7860566372302884, "grad_norm": 0.05768608986415673, "learning_rate": 3.0502747442889456e-05, "loss": 0.8428, "step": 215650 }, { "epoch": 3.78623220211029, "grad_norm": 0.044682022768163, "learning_rate": 3.049492411570621e-05, "loss": 0.8453, "step": 215660 }, { "epoch": 3.7864077669902914, "grad_norm": 0.049629449531778835, "learning_rate": 3.0487101681835838e-05, "loss": 0.8419, "step": 215670 }, { "epoch": 3.7865833318702924, "grad_norm": 0.04659385074331223, "learning_rate": 3.0479280141377385e-05, "loss": 0.8401, "step": 215680 }, { "epoch": 3.7867588967502943, "grad_norm": 0.047276483028826685, "learning_rate": 3.047145949443e-05, "loss": 0.8338, "step": 215690 }, { "epoch": 3.7869344616302953, "grad_norm": 0.04481572044479173, "learning_rate": 3.046363974109269e-05, "loss": 0.8452, "step": 215700 }, { "epoch": 3.787110026510297, "grad_norm": 0.0504226049332478, "learning_rate": 3.04558208814646e-05, "loss": 0.8411, "step": 215710 }, { "epoch": 3.7872855913902983, "grad_norm": 0.046138093716134906, "learning_rate": 3.0448002915644814e-05, "loss": 0.8427, "step": 215720 }, { "epoch": 3.7874611562703, "grad_norm": 0.040137714957505934, "learning_rate": 3.0440185843732273e-05, "loss": 0.847, "step": 215730 }, { "epoch": 3.7876367211503013, "grad_norm": 0.05880731775692375, "learning_rate": 3.0432369665826178e-05, "loss": 0.8332, "step": 215740 }, { "epoch": 3.7878122860303023, "grad_norm": 0.05230460677304659, "learning_rate": 3.0424554382025443e-05, "loss": 0.8405, "step": 215750 }, { "epoch": 3.7879878509103038, "grad_norm": 0.0453767812425392, "learning_rate": 3.0416739992429168e-05, "loss": 0.8428, "step": 215760 }, { "epoch": 3.7881634157903052, "grad_norm": 0.048097577578783766, "learning_rate": 3.0408926497136292e-05, "loss": 0.8384, "step": 215770 }, { "epoch": 3.7883389806703067, "grad_norm": 0.05159412773793071, "learning_rate": 3.0401113896245838e-05, "loss": 0.8383, "step": 215780 }, { "epoch": 3.788514545550308, "grad_norm": 0.06519927067735892, "learning_rate": 3.0393302189856843e-05, "loss": 0.8427, "step": 215790 }, { "epoch": 3.7886901104303097, "grad_norm": 0.04688890583436825, "learning_rate": 3.0385491378068197e-05, "loss": 0.8388, "step": 215800 }, { "epoch": 3.788865675310311, "grad_norm": 0.04475959146942576, "learning_rate": 3.037768146097891e-05, "loss": 0.8406, "step": 215810 }, { "epoch": 3.789041240190312, "grad_norm": 0.05714126259530708, "learning_rate": 3.0369872438687974e-05, "loss": 0.8336, "step": 215820 }, { "epoch": 3.7892168050703137, "grad_norm": 0.053006246549597784, "learning_rate": 3.0362064311294224e-05, "loss": 0.8323, "step": 215830 }, { "epoch": 3.789392369950315, "grad_norm": 0.060909875352333456, "learning_rate": 3.035425707889672e-05, "loss": 0.8388, "step": 215840 }, { "epoch": 3.7895679348303166, "grad_norm": 0.0686159035642891, "learning_rate": 3.0346450741594284e-05, "loss": 0.8372, "step": 215850 }, { "epoch": 3.789743499710318, "grad_norm": 0.06706599858714622, "learning_rate": 3.0338645299485898e-05, "loss": 0.8324, "step": 215860 }, { "epoch": 3.789919064590319, "grad_norm": 0.05085721336997347, "learning_rate": 3.033084075267038e-05, "loss": 0.8424, "step": 215870 }, { "epoch": 3.7900946294703206, "grad_norm": 0.056098779542174446, "learning_rate": 3.0323037101246653e-05, "loss": 0.8332, "step": 215880 }, { "epoch": 3.790270194350322, "grad_norm": 0.04908236633510771, "learning_rate": 3.031523434531362e-05, "loss": 0.8387, "step": 215890 }, { "epoch": 3.7904457592303236, "grad_norm": 0.05344264463249456, "learning_rate": 3.0307432484970054e-05, "loss": 0.841, "step": 215900 }, { "epoch": 3.790621324110325, "grad_norm": 0.04800740411056466, "learning_rate": 3.0299631520314926e-05, "loss": 0.8375, "step": 215910 }, { "epoch": 3.7907968889903265, "grad_norm": 0.0517770488714822, "learning_rate": 3.0291831451446994e-05, "loss": 0.8495, "step": 215920 }, { "epoch": 3.790972453870328, "grad_norm": 0.05435176594216623, "learning_rate": 3.0284032278465135e-05, "loss": 0.8422, "step": 215930 }, { "epoch": 3.791148018750329, "grad_norm": 0.05652932896738733, "learning_rate": 3.027623400146811e-05, "loss": 0.8419, "step": 215940 }, { "epoch": 3.7913235836303305, "grad_norm": 0.042922287660263125, "learning_rate": 3.0268436620554753e-05, "loss": 0.8431, "step": 215950 }, { "epoch": 3.791499148510332, "grad_norm": 0.08675728623317822, "learning_rate": 3.0260640135823887e-05, "loss": 0.8399, "step": 215960 }, { "epoch": 3.7916747133903335, "grad_norm": 0.04695875546909925, "learning_rate": 3.0252844547374242e-05, "loss": 0.8399, "step": 215970 }, { "epoch": 3.791850278270335, "grad_norm": 0.04419327745122121, "learning_rate": 3.0245049855304607e-05, "loss": 0.8406, "step": 215980 }, { "epoch": 3.792025843150336, "grad_norm": 0.05171283888085386, "learning_rate": 3.02372560597138e-05, "loss": 0.8329, "step": 215990 }, { "epoch": 3.7922014080303375, "grad_norm": 0.045043946781931805, "learning_rate": 3.0229463160700444e-05, "loss": 0.8412, "step": 216000 }, { "epoch": 3.792376972910339, "grad_norm": 0.04745036642823334, "learning_rate": 3.0221671158363414e-05, "loss": 0.8328, "step": 216010 }, { "epoch": 3.7925525377903404, "grad_norm": 0.04430336122347092, "learning_rate": 3.0213880052801348e-05, "loss": 0.834, "step": 216020 }, { "epoch": 3.792728102670342, "grad_norm": 0.054788536892417916, "learning_rate": 3.0206089844113017e-05, "loss": 0.8424, "step": 216030 }, { "epoch": 3.7929036675503434, "grad_norm": 0.04705830014176271, "learning_rate": 3.0198300532397062e-05, "loss": 0.8355, "step": 216040 }, { "epoch": 3.793079232430345, "grad_norm": 0.05165207884799079, "learning_rate": 3.019051211775221e-05, "loss": 0.8458, "step": 216050 }, { "epoch": 3.793254797310346, "grad_norm": 0.06695726288812064, "learning_rate": 3.018272460027718e-05, "loss": 0.8407, "step": 216060 }, { "epoch": 3.7934303621903473, "grad_norm": 0.06348701039852789, "learning_rate": 3.0174937980070533e-05, "loss": 0.8391, "step": 216070 }, { "epoch": 3.793605927070349, "grad_norm": 0.05299575240985735, "learning_rate": 3.016715225723105e-05, "loss": 0.8408, "step": 216080 }, { "epoch": 3.7937814919503503, "grad_norm": 0.06974765748102253, "learning_rate": 3.0159367431857293e-05, "loss": 0.8342, "step": 216090 }, { "epoch": 3.793957056830352, "grad_norm": 0.06505903751834409, "learning_rate": 3.015158350404797e-05, "loss": 0.8414, "step": 216100 }, { "epoch": 3.794132621710353, "grad_norm": 0.05818494687857544, "learning_rate": 3.0143800473901623e-05, "loss": 0.8346, "step": 216110 }, { "epoch": 3.7943081865903547, "grad_norm": 0.04621913954323976, "learning_rate": 3.0136018341516908e-05, "loss": 0.8348, "step": 216120 }, { "epoch": 3.7944837514703558, "grad_norm": 0.04986290948578882, "learning_rate": 3.012823710699245e-05, "loss": 0.8374, "step": 216130 }, { "epoch": 3.7946593163503572, "grad_norm": 0.05761866035611304, "learning_rate": 3.012045677042678e-05, "loss": 0.8387, "step": 216140 }, { "epoch": 3.7948348812303587, "grad_norm": 0.060544201284411125, "learning_rate": 3.0112677331918504e-05, "loss": 0.8324, "step": 216150 }, { "epoch": 3.79501044611036, "grad_norm": 0.06106005084873182, "learning_rate": 3.0104898791566226e-05, "loss": 0.8403, "step": 216160 }, { "epoch": 3.7951860109903617, "grad_norm": 0.04713909400776089, "learning_rate": 3.00971211494684e-05, "loss": 0.8436, "step": 216170 }, { "epoch": 3.7953615758703627, "grad_norm": 0.05301026233147863, "learning_rate": 3.0089344405723703e-05, "loss": 0.842, "step": 216180 }, { "epoch": 3.795537140750364, "grad_norm": 0.045722882112358136, "learning_rate": 3.0081568560430574e-05, "loss": 0.8503, "step": 216190 }, { "epoch": 3.7957127056303657, "grad_norm": 0.04422667076540221, "learning_rate": 3.0073793613687585e-05, "loss": 0.8399, "step": 216200 }, { "epoch": 3.795888270510367, "grad_norm": 0.05702470594432797, "learning_rate": 3.00660195655932e-05, "loss": 0.8467, "step": 216210 }, { "epoch": 3.7960638353903686, "grad_norm": 0.0809340764196382, "learning_rate": 3.0058246416245945e-05, "loss": 0.8403, "step": 216220 }, { "epoch": 3.7962394002703697, "grad_norm": 0.05472050769910334, "learning_rate": 3.005047416574433e-05, "loss": 0.8388, "step": 216230 }, { "epoch": 3.7964149651503716, "grad_norm": 0.06071982031961426, "learning_rate": 3.0042702814186753e-05, "loss": 0.8442, "step": 216240 }, { "epoch": 3.7965905300303726, "grad_norm": 0.05327637295423789, "learning_rate": 3.0034932361671776e-05, "loss": 0.8487, "step": 216250 }, { "epoch": 3.796766094910374, "grad_norm": 0.07394347323106831, "learning_rate": 3.0027162808297777e-05, "loss": 0.8349, "step": 216260 }, { "epoch": 3.7969416597903756, "grad_norm": 0.05488228934149554, "learning_rate": 3.0019394154163265e-05, "loss": 0.8463, "step": 216270 }, { "epoch": 3.797117224670377, "grad_norm": 0.060324492110900796, "learning_rate": 3.0011626399366606e-05, "loss": 0.8398, "step": 216280 }, { "epoch": 3.7972927895503785, "grad_norm": 0.04740797918408799, "learning_rate": 3.0003859544006236e-05, "loss": 0.8354, "step": 216290 }, { "epoch": 3.7974683544303796, "grad_norm": 0.09224114113472429, "learning_rate": 2.9996093588180607e-05, "loss": 0.84, "step": 216300 }, { "epoch": 3.797643919310381, "grad_norm": 0.04333030648682951, "learning_rate": 2.9988328531988047e-05, "loss": 0.8396, "step": 216310 }, { "epoch": 3.7978194841903825, "grad_norm": 0.049587864642088135, "learning_rate": 2.9980564375526972e-05, "loss": 0.8409, "step": 216320 }, { "epoch": 3.797995049070384, "grad_norm": 0.04512311919726466, "learning_rate": 2.9972801118895794e-05, "loss": 0.8345, "step": 216330 }, { "epoch": 3.7981706139503855, "grad_norm": 0.05698511157083809, "learning_rate": 2.9965038762192764e-05, "loss": 0.8383, "step": 216340 }, { "epoch": 3.7983461788303865, "grad_norm": 0.06816185259066948, "learning_rate": 2.995727730551638e-05, "loss": 0.8436, "step": 216350 }, { "epoch": 3.7985217437103884, "grad_norm": 0.043120561561021335, "learning_rate": 2.9949516748964878e-05, "loss": 0.8412, "step": 216360 }, { "epoch": 3.7986973085903895, "grad_norm": 0.047531372703378816, "learning_rate": 2.9941757092636647e-05, "loss": 0.8454, "step": 216370 }, { "epoch": 3.798872873470391, "grad_norm": 0.052057346933477695, "learning_rate": 2.993399833662993e-05, "loss": 0.8405, "step": 216380 }, { "epoch": 3.7990484383503924, "grad_norm": 0.06443667393672588, "learning_rate": 2.992624048104309e-05, "loss": 0.8445, "step": 216390 }, { "epoch": 3.799224003230394, "grad_norm": 0.07379486440828018, "learning_rate": 2.9918483525974418e-05, "loss": 0.8379, "step": 216400 }, { "epoch": 3.7993995681103954, "grad_norm": 0.044012235940425255, "learning_rate": 2.9910727471522126e-05, "loss": 0.8416, "step": 216410 }, { "epoch": 3.7995751329903964, "grad_norm": 0.06407130833236857, "learning_rate": 2.9902972317784612e-05, "loss": 0.8344, "step": 216420 }, { "epoch": 3.799750697870398, "grad_norm": 0.06017323556711089, "learning_rate": 2.9895218064860024e-05, "loss": 0.8348, "step": 216430 }, { "epoch": 3.7999262627503994, "grad_norm": 0.06522120674104633, "learning_rate": 2.9887464712846692e-05, "loss": 0.8403, "step": 216440 }, { "epoch": 3.800101827630401, "grad_norm": 0.07172474265932748, "learning_rate": 2.9879712261842767e-05, "loss": 0.8401, "step": 216450 }, { "epoch": 3.8002773925104023, "grad_norm": 0.06420950050621067, "learning_rate": 2.987196071194652e-05, "loss": 0.8429, "step": 216460 }, { "epoch": 3.800452957390404, "grad_norm": 0.04872366409263709, "learning_rate": 2.98642100632562e-05, "loss": 0.8461, "step": 216470 }, { "epoch": 3.8006285222704053, "grad_norm": 0.05573915750218048, "learning_rate": 2.985646031586993e-05, "loss": 0.8378, "step": 216480 }, { "epoch": 3.8008040871504063, "grad_norm": 0.046346114220855786, "learning_rate": 2.9848711469885955e-05, "loss": 0.8439, "step": 216490 }, { "epoch": 3.8009796520304078, "grad_norm": 0.06462167109912452, "learning_rate": 2.9840963525402465e-05, "loss": 0.8407, "step": 216500 }, { "epoch": 3.8011552169104093, "grad_norm": 0.05667411028739348, "learning_rate": 2.9833216482517544e-05, "loss": 0.8464, "step": 216510 }, { "epoch": 3.8013307817904107, "grad_norm": 0.04863056416384048, "learning_rate": 2.982547034132947e-05, "loss": 0.8349, "step": 216520 }, { "epoch": 3.801506346670412, "grad_norm": 0.045643384446940015, "learning_rate": 2.98177251019363e-05, "loss": 0.8351, "step": 216530 }, { "epoch": 3.8016819115504132, "grad_norm": 0.06114555025814227, "learning_rate": 2.980998076443623e-05, "loss": 0.8463, "step": 216540 }, { "epoch": 3.8018574764304147, "grad_norm": 0.05158082667860177, "learning_rate": 2.98022373289273e-05, "loss": 0.8451, "step": 216550 }, { "epoch": 3.802033041310416, "grad_norm": 0.06181672117244222, "learning_rate": 2.9794494795507683e-05, "loss": 0.829, "step": 216560 }, { "epoch": 3.8022086061904177, "grad_norm": 0.04756833104005501, "learning_rate": 2.978675316427549e-05, "loss": 0.8457, "step": 216570 }, { "epoch": 3.802384171070419, "grad_norm": 0.060691207112528435, "learning_rate": 2.9779012435328724e-05, "loss": 0.8394, "step": 216580 }, { "epoch": 3.8025597359504206, "grad_norm": 0.05342490287842647, "learning_rate": 2.977127260876558e-05, "loss": 0.837, "step": 216590 }, { "epoch": 3.802735300830422, "grad_norm": 0.0445617363323546, "learning_rate": 2.9763533684684024e-05, "loss": 0.8444, "step": 216600 }, { "epoch": 3.802910865710423, "grad_norm": 0.06231984547932772, "learning_rate": 2.975579566318219e-05, "loss": 0.8263, "step": 216610 }, { "epoch": 3.8030864305904246, "grad_norm": 0.059800258591130535, "learning_rate": 2.974805854435804e-05, "loss": 0.8341, "step": 216620 }, { "epoch": 3.803261995470426, "grad_norm": 0.046250147139957384, "learning_rate": 2.9740322328309646e-05, "loss": 0.8419, "step": 216630 }, { "epoch": 3.8034375603504276, "grad_norm": 0.06319462832953164, "learning_rate": 2.973258701513506e-05, "loss": 0.8437, "step": 216640 }, { "epoch": 3.803613125230429, "grad_norm": 0.07758933135715074, "learning_rate": 2.9724852604932218e-05, "loss": 0.8471, "step": 216650 }, { "epoch": 3.80378869011043, "grad_norm": 0.055051234989771054, "learning_rate": 2.9717119097799155e-05, "loss": 0.8449, "step": 216660 }, { "epoch": 3.8039642549904316, "grad_norm": 0.03867482231691754, "learning_rate": 2.9709386493833892e-05, "loss": 0.8386, "step": 216670 }, { "epoch": 3.804139819870433, "grad_norm": 0.060432878383078655, "learning_rate": 2.9701654793134294e-05, "loss": 0.8423, "step": 216680 }, { "epoch": 3.8043153847504345, "grad_norm": 0.10601325263165481, "learning_rate": 2.969392399579847e-05, "loss": 0.8446, "step": 216690 }, { "epoch": 3.804490949630436, "grad_norm": 0.05723182107102692, "learning_rate": 2.968619410192424e-05, "loss": 0.8459, "step": 216700 }, { "epoch": 3.8046665145104375, "grad_norm": 0.13962706150904752, "learning_rate": 2.9678465111609643e-05, "loss": 0.8417, "step": 216710 }, { "epoch": 3.804842079390439, "grad_norm": 0.05100983371127837, "learning_rate": 2.967073702495254e-05, "loss": 0.8372, "step": 216720 }, { "epoch": 3.80501764427044, "grad_norm": 0.0479867968628532, "learning_rate": 2.966300984205086e-05, "loss": 0.8464, "step": 216730 }, { "epoch": 3.8051932091504415, "grad_norm": 0.051614516248306604, "learning_rate": 2.9655283563002533e-05, "loss": 0.843, "step": 216740 }, { "epoch": 3.805368774030443, "grad_norm": 0.0527799857992353, "learning_rate": 2.9647558187905395e-05, "loss": 0.8434, "step": 216750 }, { "epoch": 3.8055443389104444, "grad_norm": 0.057872429703993186, "learning_rate": 2.9639833716857428e-05, "loss": 0.8342, "step": 216760 }, { "epoch": 3.805719903790446, "grad_norm": 0.05133491071281844, "learning_rate": 2.9632110149956403e-05, "loss": 0.8415, "step": 216770 }, { "epoch": 3.805895468670447, "grad_norm": 0.05359066689784784, "learning_rate": 2.9624387487300265e-05, "loss": 0.849, "step": 216780 }, { "epoch": 3.806071033550449, "grad_norm": 0.0712960261676301, "learning_rate": 2.961666572898677e-05, "loss": 0.8435, "step": 216790 }, { "epoch": 3.80624659843045, "grad_norm": 0.04885324988916581, "learning_rate": 2.9608944875113815e-05, "loss": 0.8468, "step": 216800 }, { "epoch": 3.8064221633104514, "grad_norm": 0.057888907847736704, "learning_rate": 2.9601224925779235e-05, "loss": 0.8365, "step": 216810 }, { "epoch": 3.806597728190453, "grad_norm": 0.047553237305033684, "learning_rate": 2.959350588108079e-05, "loss": 0.8342, "step": 216820 }, { "epoch": 3.8067732930704543, "grad_norm": 0.060380486743219386, "learning_rate": 2.958578774111631e-05, "loss": 0.8295, "step": 216830 }, { "epoch": 3.806948857950456, "grad_norm": 0.04451877502300554, "learning_rate": 2.9578070505983617e-05, "loss": 0.8408, "step": 216840 }, { "epoch": 3.807124422830457, "grad_norm": 0.05396986283160004, "learning_rate": 2.9570354175780392e-05, "loss": 0.8372, "step": 216850 }, { "epoch": 3.8072999877104583, "grad_norm": 0.05160566057346454, "learning_rate": 2.9562638750604532e-05, "loss": 0.8392, "step": 216860 }, { "epoch": 3.8074755525904598, "grad_norm": 0.06213281791003979, "learning_rate": 2.9554924230553698e-05, "loss": 0.8373, "step": 216870 }, { "epoch": 3.8076511174704613, "grad_norm": 0.05162443440737965, "learning_rate": 2.9547210615725706e-05, "loss": 0.842, "step": 216880 }, { "epoch": 3.8078266823504627, "grad_norm": 0.05239839618028367, "learning_rate": 2.9539497906218205e-05, "loss": 0.8383, "step": 216890 }, { "epoch": 3.8080022472304638, "grad_norm": 0.04499409468630796, "learning_rate": 2.953178610212897e-05, "loss": 0.8382, "step": 216900 }, { "epoch": 3.8081778121104657, "grad_norm": 0.05886410355732132, "learning_rate": 2.952407520355574e-05, "loss": 0.8399, "step": 216910 }, { "epoch": 3.8083533769904667, "grad_norm": 0.054736348826220575, "learning_rate": 2.9516365210596096e-05, "loss": 0.8411, "step": 216920 }, { "epoch": 3.808528941870468, "grad_norm": 0.054152477206964256, "learning_rate": 2.9508656123347883e-05, "loss": 0.8314, "step": 216930 }, { "epoch": 3.8087045067504697, "grad_norm": 0.04135410457107819, "learning_rate": 2.9500947941908656e-05, "loss": 0.8368, "step": 216940 }, { "epoch": 3.808880071630471, "grad_norm": 0.06515203382446555, "learning_rate": 2.9493240666376155e-05, "loss": 0.8344, "step": 216950 }, { "epoch": 3.8090556365104726, "grad_norm": 0.04954539049885234, "learning_rate": 2.948553429684798e-05, "loss": 0.8418, "step": 216960 }, { "epoch": 3.8092312013904737, "grad_norm": 0.05812799113499499, "learning_rate": 2.9477828833421772e-05, "loss": 0.8359, "step": 216970 }, { "epoch": 3.809406766270475, "grad_norm": 0.04232553363656654, "learning_rate": 2.9470124276195228e-05, "loss": 0.8348, "step": 216980 }, { "epoch": 3.8095823311504766, "grad_norm": 0.06253004756238967, "learning_rate": 2.946242062526587e-05, "loss": 0.8408, "step": 216990 }, { "epoch": 3.809757896030478, "grad_norm": 0.04760882881421293, "learning_rate": 2.9454717880731365e-05, "loss": 0.84, "step": 217000 }, { "epoch": 3.8099334609104796, "grad_norm": 0.04942275574157079, "learning_rate": 2.9447016042689317e-05, "loss": 0.8399, "step": 217010 }, { "epoch": 3.8101090257904806, "grad_norm": 0.05824313618163893, "learning_rate": 2.9439315111237238e-05, "loss": 0.8299, "step": 217020 }, { "epoch": 3.8102845906704825, "grad_norm": 0.05995559316748785, "learning_rate": 2.9431615086472806e-05, "loss": 0.8352, "step": 217030 }, { "epoch": 3.8104601555504836, "grad_norm": 0.04696841576170053, "learning_rate": 2.94239159684935e-05, "loss": 0.8346, "step": 217040 }, { "epoch": 3.810635720430485, "grad_norm": 0.056660098500586516, "learning_rate": 2.941621775739692e-05, "loss": 0.8381, "step": 217050 }, { "epoch": 3.8108112853104865, "grad_norm": 0.05908190047778835, "learning_rate": 2.9408520453280557e-05, "loss": 0.839, "step": 217060 }, { "epoch": 3.810986850190488, "grad_norm": 0.06124991396504309, "learning_rate": 2.940082405624196e-05, "loss": 0.8317, "step": 217070 }, { "epoch": 3.8111624150704895, "grad_norm": 0.05614191039614267, "learning_rate": 2.9393128566378675e-05, "loss": 0.8404, "step": 217080 }, { "epoch": 3.8113379799504905, "grad_norm": 0.06362326572803861, "learning_rate": 2.938543398378811e-05, "loss": 0.8378, "step": 217090 }, { "epoch": 3.811513544830492, "grad_norm": 0.06862774612917868, "learning_rate": 2.937774030856788e-05, "loss": 0.8461, "step": 217100 }, { "epoch": 3.8116891097104935, "grad_norm": 0.05333532586253141, "learning_rate": 2.9370047540815375e-05, "loss": 0.8416, "step": 217110 }, { "epoch": 3.811864674590495, "grad_norm": 0.044566917871036504, "learning_rate": 2.9362355680628133e-05, "loss": 0.8422, "step": 217120 }, { "epoch": 3.8120402394704964, "grad_norm": 0.05749830654168588, "learning_rate": 2.935466472810354e-05, "loss": 0.8418, "step": 217130 }, { "epoch": 3.8122158043504975, "grad_norm": 0.05331162033863563, "learning_rate": 2.9346974683339078e-05, "loss": 0.8424, "step": 217140 }, { "epoch": 3.8123913692304994, "grad_norm": 0.04901158490204302, "learning_rate": 2.93392855464322e-05, "loss": 0.829, "step": 217150 }, { "epoch": 3.8125669341105004, "grad_norm": 0.05093363608691648, "learning_rate": 2.9331597317480285e-05, "loss": 0.8419, "step": 217160 }, { "epoch": 3.812742498990502, "grad_norm": 0.06479845150369613, "learning_rate": 2.932390999658077e-05, "loss": 0.8401, "step": 217170 }, { "epoch": 3.8129180638705034, "grad_norm": 0.046874291255262664, "learning_rate": 2.9316223583831088e-05, "loss": 0.8363, "step": 217180 }, { "epoch": 3.813093628750505, "grad_norm": 0.05260339360580579, "learning_rate": 2.930853807932851e-05, "loss": 0.8405, "step": 217190 }, { "epoch": 3.8132691936305063, "grad_norm": 0.05257058887720699, "learning_rate": 2.9300853483170572e-05, "loss": 0.8382, "step": 217200 }, { "epoch": 3.8134447585105073, "grad_norm": 0.053998082252720105, "learning_rate": 2.9293169795454527e-05, "loss": 0.8406, "step": 217210 }, { "epoch": 3.813620323390509, "grad_norm": 0.05686674069052367, "learning_rate": 2.9285487016277792e-05, "loss": 0.832, "step": 217220 }, { "epoch": 3.8137958882705103, "grad_norm": 0.05392356848327632, "learning_rate": 2.9277805145737653e-05, "loss": 0.839, "step": 217230 }, { "epoch": 3.813971453150512, "grad_norm": 0.05553312844528772, "learning_rate": 2.9270124183931465e-05, "loss": 0.8457, "step": 217240 }, { "epoch": 3.8141470180305133, "grad_norm": 0.07503974622572539, "learning_rate": 2.9262444130956586e-05, "loss": 0.8372, "step": 217250 }, { "epoch": 3.8143225829105147, "grad_norm": 0.05815113545577752, "learning_rate": 2.9254764986910214e-05, "loss": 0.843, "step": 217260 }, { "epoch": 3.814498147790516, "grad_norm": 0.046662134764316666, "learning_rate": 2.924708675188979e-05, "loss": 0.8413, "step": 217270 }, { "epoch": 3.8146737126705172, "grad_norm": 0.06136568561607182, "learning_rate": 2.9239409425992498e-05, "loss": 0.8435, "step": 217280 }, { "epoch": 3.8148492775505187, "grad_norm": 0.04496130311075327, "learning_rate": 2.9231733009315672e-05, "loss": 0.8421, "step": 217290 }, { "epoch": 3.81502484243052, "grad_norm": 0.05251405047814573, "learning_rate": 2.9224057501956508e-05, "loss": 0.8327, "step": 217300 }, { "epoch": 3.8152004073105217, "grad_norm": 0.05457367396153035, "learning_rate": 2.9216382904012302e-05, "loss": 0.8383, "step": 217310 }, { "epoch": 3.815375972190523, "grad_norm": 0.05138439740947332, "learning_rate": 2.92087092155803e-05, "loss": 0.8468, "step": 217320 }, { "epoch": 3.815551537070524, "grad_norm": 0.05075604632020879, "learning_rate": 2.9201036436757695e-05, "loss": 0.8359, "step": 217330 }, { "epoch": 3.8157271019505257, "grad_norm": 0.05506286422402078, "learning_rate": 2.91933645676417e-05, "loss": 0.8415, "step": 217340 }, { "epoch": 3.815902666830527, "grad_norm": 0.053542496753468574, "learning_rate": 2.9185693608329584e-05, "loss": 0.833, "step": 217350 }, { "epoch": 3.8160782317105286, "grad_norm": 0.04709276070923575, "learning_rate": 2.917802355891842e-05, "loss": 0.8485, "step": 217360 }, { "epoch": 3.81625379659053, "grad_norm": 0.04965160060176507, "learning_rate": 2.9170354419505534e-05, "loss": 0.8371, "step": 217370 }, { "epoch": 3.8164293614705316, "grad_norm": 0.05830861227375066, "learning_rate": 2.9162686190187987e-05, "loss": 0.8388, "step": 217380 }, { "epoch": 3.816604926350533, "grad_norm": 0.05024693016577014, "learning_rate": 2.9155018871063e-05, "loss": 0.8361, "step": 217390 }, { "epoch": 3.816780491230534, "grad_norm": 0.07220789898643996, "learning_rate": 2.914735246222767e-05, "loss": 0.8418, "step": 217400 }, { "epoch": 3.8169560561105356, "grad_norm": 0.051325071909766276, "learning_rate": 2.9139686963779156e-05, "loss": 0.8415, "step": 217410 }, { "epoch": 3.817131620990537, "grad_norm": 0.05186060457237809, "learning_rate": 2.9132022375814605e-05, "loss": 0.8342, "step": 217420 }, { "epoch": 3.8173071858705385, "grad_norm": 0.043625592829566746, "learning_rate": 2.912435869843105e-05, "loss": 0.8364, "step": 217430 }, { "epoch": 3.81748275075054, "grad_norm": 0.04603764544549811, "learning_rate": 2.9116695931725703e-05, "loss": 0.8352, "step": 217440 }, { "epoch": 3.817658315630541, "grad_norm": 0.045966051396095185, "learning_rate": 2.910903407579556e-05, "loss": 0.8397, "step": 217450 }, { "epoch": 3.8178338805105425, "grad_norm": 0.06776435742153147, "learning_rate": 2.9101373130737765e-05, "loss": 0.8363, "step": 217460 }, { "epoch": 3.818009445390544, "grad_norm": 0.052196758557891486, "learning_rate": 2.9093713096649323e-05, "loss": 0.838, "step": 217470 }, { "epoch": 3.8181850102705455, "grad_norm": 0.053590190660677404, "learning_rate": 2.9086053973627314e-05, "loss": 0.8405, "step": 217480 }, { "epoch": 3.818360575150547, "grad_norm": 0.05067688265083248, "learning_rate": 2.907839576176881e-05, "loss": 0.836, "step": 217490 }, { "epoch": 3.8185361400305484, "grad_norm": 0.04909734318283264, "learning_rate": 2.9070738461170795e-05, "loss": 0.8378, "step": 217500 }, { "epoch": 3.81871170491055, "grad_norm": 0.05496334567904103, "learning_rate": 2.906308207193031e-05, "loss": 0.832, "step": 217510 }, { "epoch": 3.818887269790551, "grad_norm": 0.05631008877521826, "learning_rate": 2.905542659414439e-05, "loss": 0.8441, "step": 217520 }, { "epoch": 3.8190628346705524, "grad_norm": 0.05678317368765007, "learning_rate": 2.9047772027909936e-05, "loss": 0.8478, "step": 217530 }, { "epoch": 3.819238399550554, "grad_norm": 0.05359000575235598, "learning_rate": 2.9040118373324062e-05, "loss": 0.8348, "step": 217540 }, { "epoch": 3.8194139644305554, "grad_norm": 0.04772043831295848, "learning_rate": 2.9032465630483656e-05, "loss": 0.8407, "step": 217550 }, { "epoch": 3.819589529310557, "grad_norm": 0.06696473542791347, "learning_rate": 2.9024813799485724e-05, "loss": 0.8367, "step": 217560 }, { "epoch": 3.819765094190558, "grad_norm": 0.06514849206232294, "learning_rate": 2.9017162880427167e-05, "loss": 0.8342, "step": 217570 }, { "epoch": 3.81994065907056, "grad_norm": 0.06125027259968786, "learning_rate": 2.900951287340496e-05, "loss": 0.8377, "step": 217580 }, { "epoch": 3.820116223950561, "grad_norm": 0.05395400304987546, "learning_rate": 2.9001863778516035e-05, "loss": 0.8376, "step": 217590 }, { "epoch": 3.8202917888305623, "grad_norm": 0.04386858689623785, "learning_rate": 2.8994215595857255e-05, "loss": 0.8435, "step": 217600 }, { "epoch": 3.820467353710564, "grad_norm": 0.052919265230094485, "learning_rate": 2.8986568325525614e-05, "loss": 0.8379, "step": 217610 }, { "epoch": 3.8206429185905653, "grad_norm": 0.0469909856580591, "learning_rate": 2.8978921967617922e-05, "loss": 0.8359, "step": 217620 }, { "epoch": 3.8208184834705667, "grad_norm": 0.05796953206880837, "learning_rate": 2.8971276522231122e-05, "loss": 0.8328, "step": 217630 }, { "epoch": 3.8209940483505678, "grad_norm": 0.07386189347663745, "learning_rate": 2.8963631989462025e-05, "loss": 0.8367, "step": 217640 }, { "epoch": 3.8211696132305693, "grad_norm": 0.05077625535813406, "learning_rate": 2.8955988369407517e-05, "loss": 0.836, "step": 217650 }, { "epoch": 3.8213451781105707, "grad_norm": 0.06433096663627376, "learning_rate": 2.894834566216446e-05, "loss": 0.8257, "step": 217660 }, { "epoch": 3.821520742990572, "grad_norm": 0.08254608903882428, "learning_rate": 2.8940703867829656e-05, "loss": 0.8417, "step": 217670 }, { "epoch": 3.8216963078705737, "grad_norm": 0.054692110726047245, "learning_rate": 2.8933062986499943e-05, "loss": 0.8394, "step": 217680 }, { "epoch": 3.8218718727505747, "grad_norm": 0.0421971690743456, "learning_rate": 2.8925423018272163e-05, "loss": 0.843, "step": 217690 }, { "epoch": 3.8220474376305766, "grad_norm": 0.06618326744882382, "learning_rate": 2.8917783963243028e-05, "loss": 0.8419, "step": 217700 }, { "epoch": 3.8222230025105777, "grad_norm": 0.061053564484274646, "learning_rate": 2.8910145821509445e-05, "loss": 0.8437, "step": 217710 }, { "epoch": 3.822398567390579, "grad_norm": 0.05727020617273264, "learning_rate": 2.890250859316809e-05, "loss": 0.8406, "step": 217720 }, { "epoch": 3.8225741322705806, "grad_norm": 0.06864105600709133, "learning_rate": 2.889487227831581e-05, "loss": 0.8365, "step": 217730 }, { "epoch": 3.822749697150582, "grad_norm": 0.06559234990720195, "learning_rate": 2.8887236877049283e-05, "loss": 0.8344, "step": 217740 }, { "epoch": 3.8229252620305836, "grad_norm": 0.051088176523832865, "learning_rate": 2.8879602389465277e-05, "loss": 0.8417, "step": 217750 }, { "epoch": 3.8231008269105846, "grad_norm": 0.05469541270208154, "learning_rate": 2.887196881566058e-05, "loss": 0.8424, "step": 217760 }, { "epoch": 3.823276391790586, "grad_norm": 0.04819606655624512, "learning_rate": 2.886433615573178e-05, "loss": 0.8348, "step": 217770 }, { "epoch": 3.8234519566705876, "grad_norm": 0.06066112448470286, "learning_rate": 2.8856704409775748e-05, "loss": 0.8403, "step": 217780 }, { "epoch": 3.823627521550589, "grad_norm": 0.0578070037322324, "learning_rate": 2.8849073577889046e-05, "loss": 0.8433, "step": 217790 }, { "epoch": 3.8238030864305905, "grad_norm": 0.04021456416061309, "learning_rate": 2.8841443660168455e-05, "loss": 0.8406, "step": 217800 }, { "epoch": 3.8239786513105916, "grad_norm": 0.04689269823392733, "learning_rate": 2.8833814656710566e-05, "loss": 0.8358, "step": 217810 }, { "epoch": 3.8241542161905935, "grad_norm": 0.059586372589268, "learning_rate": 2.882618656761207e-05, "loss": 0.8341, "step": 217820 }, { "epoch": 3.8243297810705945, "grad_norm": 0.05048393301332705, "learning_rate": 2.8818559392969664e-05, "loss": 0.8454, "step": 217830 }, { "epoch": 3.824505345950596, "grad_norm": 0.05515371634432426, "learning_rate": 2.8810933132879902e-05, "loss": 0.8351, "step": 217840 }, { "epoch": 3.8246809108305975, "grad_norm": 0.037974768694121405, "learning_rate": 2.8803307787439455e-05, "loss": 0.8437, "step": 217850 }, { "epoch": 3.824856475710599, "grad_norm": 0.07384821010002073, "learning_rate": 2.8795683356744968e-05, "loss": 0.8438, "step": 217860 }, { "epoch": 3.8250320405906004, "grad_norm": 0.045116627362539664, "learning_rate": 2.8788059840892955e-05, "loss": 0.8486, "step": 217870 }, { "epoch": 3.8252076054706015, "grad_norm": 0.05137816335365439, "learning_rate": 2.8780437239980112e-05, "loss": 0.8371, "step": 217880 }, { "epoch": 3.825383170350603, "grad_norm": 0.0635493055194883, "learning_rate": 2.877281555410295e-05, "loss": 0.8353, "step": 217890 }, { "epoch": 3.8255587352306044, "grad_norm": 0.0814291578961263, "learning_rate": 2.876519478335809e-05, "loss": 0.845, "step": 217900 }, { "epoch": 3.825734300110606, "grad_norm": 0.04915275085982769, "learning_rate": 2.8757574927842014e-05, "loss": 0.8354, "step": 217910 }, { "epoch": 3.8259098649906074, "grad_norm": 0.05672688110338907, "learning_rate": 2.8749955987651317e-05, "loss": 0.8392, "step": 217920 }, { "epoch": 3.826085429870609, "grad_norm": 0.04610049327477415, "learning_rate": 2.8742337962882547e-05, "loss": 0.8346, "step": 217930 }, { "epoch": 3.8262609947506103, "grad_norm": 0.06777879873542218, "learning_rate": 2.8734720853632153e-05, "loss": 0.8339, "step": 217940 }, { "epoch": 3.8264365596306114, "grad_norm": 0.04391237411164208, "learning_rate": 2.8727104659996752e-05, "loss": 0.8426, "step": 217950 }, { "epoch": 3.826612124510613, "grad_norm": 0.058975691442167966, "learning_rate": 2.8719489382072746e-05, "loss": 0.836, "step": 217960 }, { "epoch": 3.8267876893906143, "grad_norm": 0.04931930086000837, "learning_rate": 2.8711875019956706e-05, "loss": 0.8398, "step": 217970 }, { "epoch": 3.826963254270616, "grad_norm": 0.05741677401108841, "learning_rate": 2.870426157374503e-05, "loss": 0.8413, "step": 217980 }, { "epoch": 3.8271388191506173, "grad_norm": 0.07181429712060745, "learning_rate": 2.8696649043534213e-05, "loss": 0.8315, "step": 217990 }, { "epoch": 3.8273143840306183, "grad_norm": 0.09330259707602125, "learning_rate": 2.8689037429420734e-05, "loss": 0.8388, "step": 218000 }, { "epoch": 3.8274899489106198, "grad_norm": 0.052211788444250014, "learning_rate": 2.8681426731500988e-05, "loss": 0.8395, "step": 218010 }, { "epoch": 3.8276655137906213, "grad_norm": 0.04315322754962974, "learning_rate": 2.8673816949871424e-05, "loss": 0.843, "step": 218020 }, { "epoch": 3.8278410786706227, "grad_norm": 0.05160784568054489, "learning_rate": 2.8666208084628477e-05, "loss": 0.8352, "step": 218030 }, { "epoch": 3.828016643550624, "grad_norm": 0.055811994579850824, "learning_rate": 2.865860013586849e-05, "loss": 0.8387, "step": 218040 }, { "epoch": 3.8281922084306257, "grad_norm": 0.04513846018871603, "learning_rate": 2.8650993103687968e-05, "loss": 0.843, "step": 218050 }, { "epoch": 3.828367773310627, "grad_norm": 0.0440722421388137, "learning_rate": 2.8643386988183184e-05, "loss": 0.8353, "step": 218060 }, { "epoch": 3.828543338190628, "grad_norm": 0.057086227456068915, "learning_rate": 2.86357817894506e-05, "loss": 0.8403, "step": 218070 }, { "epoch": 3.8287189030706297, "grad_norm": 0.05456573226152338, "learning_rate": 2.8628177507586484e-05, "loss": 0.8372, "step": 218080 }, { "epoch": 3.828894467950631, "grad_norm": 0.06429449434450157, "learning_rate": 2.862057414268724e-05, "loss": 0.8335, "step": 218090 }, { "epoch": 3.8290700328306326, "grad_norm": 0.049037838027752766, "learning_rate": 2.861297169484921e-05, "loss": 0.8374, "step": 218100 }, { "epoch": 3.829245597710634, "grad_norm": 0.055989380561605795, "learning_rate": 2.860537016416866e-05, "loss": 0.8449, "step": 218110 }, { "epoch": 3.829421162590635, "grad_norm": 0.047041374913952515, "learning_rate": 2.8597769550741992e-05, "loss": 0.8404, "step": 218120 }, { "epoch": 3.8295967274706366, "grad_norm": 0.055331828870274186, "learning_rate": 2.8590169854665434e-05, "loss": 0.8384, "step": 218130 }, { "epoch": 3.829772292350638, "grad_norm": 0.060853224020179335, "learning_rate": 2.8582571076035326e-05, "loss": 0.8401, "step": 218140 }, { "epoch": 3.8299478572306396, "grad_norm": 0.04805027525511307, "learning_rate": 2.8574973214947905e-05, "loss": 0.8337, "step": 218150 }, { "epoch": 3.830123422110641, "grad_norm": 0.04925848259285232, "learning_rate": 2.8567376271499435e-05, "loss": 0.8349, "step": 218160 }, { "epoch": 3.8302989869906425, "grad_norm": 0.08251862772793578, "learning_rate": 2.8559780245786233e-05, "loss": 0.8434, "step": 218170 }, { "epoch": 3.830474551870644, "grad_norm": 0.06677559085774283, "learning_rate": 2.8552185137904458e-05, "loss": 0.839, "step": 218180 }, { "epoch": 3.830650116750645, "grad_norm": 0.049540208311358996, "learning_rate": 2.8544590947950394e-05, "loss": 0.8375, "step": 218190 }, { "epoch": 3.8308256816306465, "grad_norm": 0.050911789926577536, "learning_rate": 2.8536997676020265e-05, "loss": 0.8502, "step": 218200 }, { "epoch": 3.831001246510648, "grad_norm": 0.05945922816175219, "learning_rate": 2.8529405322210214e-05, "loss": 0.8385, "step": 218210 }, { "epoch": 3.8311768113906495, "grad_norm": 0.062153917777248636, "learning_rate": 2.852181388661654e-05, "loss": 0.8379, "step": 218220 }, { "epoch": 3.831352376270651, "grad_norm": 0.0744492683464073, "learning_rate": 2.8514223369335345e-05, "loss": 0.8473, "step": 218230 }, { "epoch": 3.831527941150652, "grad_norm": 0.05471730002340675, "learning_rate": 2.850663377046286e-05, "loss": 0.8415, "step": 218240 }, { "epoch": 3.831703506030654, "grad_norm": 0.06192529510611837, "learning_rate": 2.84990450900952e-05, "loss": 0.8447, "step": 218250 }, { "epoch": 3.831879070910655, "grad_norm": 0.05653469580190741, "learning_rate": 2.8491457328328526e-05, "loss": 0.8392, "step": 218260 }, { "epoch": 3.8320546357906564, "grad_norm": 0.06047528120682877, "learning_rate": 2.8483870485259007e-05, "loss": 0.8403, "step": 218270 }, { "epoch": 3.832230200670658, "grad_norm": 0.05087370914830749, "learning_rate": 2.847628456098269e-05, "loss": 0.8424, "step": 218280 }, { "epoch": 3.8324057655506594, "grad_norm": 0.04687244494809302, "learning_rate": 2.8468699555595813e-05, "loss": 0.8399, "step": 218290 }, { "epoch": 3.832581330430661, "grad_norm": 0.05535476382325871, "learning_rate": 2.8461115469194384e-05, "loss": 0.8362, "step": 218300 }, { "epoch": 3.832756895310662, "grad_norm": 0.04647643017570771, "learning_rate": 2.8453532301874542e-05, "loss": 0.831, "step": 218310 }, { "epoch": 3.8329324601906634, "grad_norm": 0.05094426988029317, "learning_rate": 2.844595005373233e-05, "loss": 0.8467, "step": 218320 }, { "epoch": 3.833108025070665, "grad_norm": 0.07458383075135919, "learning_rate": 2.8438368724863835e-05, "loss": 0.8496, "step": 218330 }, { "epoch": 3.8332835899506663, "grad_norm": 0.04755210771512372, "learning_rate": 2.8430788315365148e-05, "loss": 0.8427, "step": 218340 }, { "epoch": 3.833459154830668, "grad_norm": 0.05598820424806286, "learning_rate": 2.8423208825332242e-05, "loss": 0.8354, "step": 218350 }, { "epoch": 3.833634719710669, "grad_norm": 0.047705029492315754, "learning_rate": 2.841563025486119e-05, "loss": 0.8376, "step": 218360 }, { "epoch": 3.8338102845906707, "grad_norm": 0.05452524272249354, "learning_rate": 2.8408052604048047e-05, "loss": 0.8353, "step": 218370 }, { "epoch": 3.833985849470672, "grad_norm": 0.06595402603784706, "learning_rate": 2.840047587298873e-05, "loss": 0.8452, "step": 218380 }, { "epoch": 3.8341614143506733, "grad_norm": 0.05029788778667228, "learning_rate": 2.8392900061779358e-05, "loss": 0.8404, "step": 218390 }, { "epoch": 3.8343369792306747, "grad_norm": 0.061210028038722185, "learning_rate": 2.8385325170515826e-05, "loss": 0.8328, "step": 218400 }, { "epoch": 3.834512544110676, "grad_norm": 0.0511658572771602, "learning_rate": 2.837775119929416e-05, "loss": 0.836, "step": 218410 }, { "epoch": 3.8346881089906777, "grad_norm": 0.05447957013563409, "learning_rate": 2.8370178148210293e-05, "loss": 0.8342, "step": 218420 }, { "epoch": 3.8348636738706787, "grad_norm": 0.051541719534348496, "learning_rate": 2.8362606017360176e-05, "loss": 0.8419, "step": 218430 }, { "epoch": 3.83503923875068, "grad_norm": 0.05284625414103271, "learning_rate": 2.8355034806839792e-05, "loss": 0.8363, "step": 218440 }, { "epoch": 3.8352148036306817, "grad_norm": 0.059232496685376355, "learning_rate": 2.8347464516744986e-05, "loss": 0.843, "step": 218450 }, { "epoch": 3.835390368510683, "grad_norm": 0.043039978245789305, "learning_rate": 2.8339895147171784e-05, "loss": 0.84, "step": 218460 }, { "epoch": 3.8355659333906846, "grad_norm": 0.06845129705140372, "learning_rate": 2.8332326698216e-05, "loss": 0.8431, "step": 218470 }, { "epoch": 3.8357414982706857, "grad_norm": 0.055852932708701876, "learning_rate": 2.832475916997361e-05, "loss": 0.8304, "step": 218480 }, { "epoch": 3.8359170631506876, "grad_norm": 0.04750815592257536, "learning_rate": 2.8317192562540407e-05, "loss": 0.8362, "step": 218490 }, { "epoch": 3.8360926280306886, "grad_norm": 0.05317315294702896, "learning_rate": 2.8309626876012308e-05, "loss": 0.8381, "step": 218500 }, { "epoch": 3.83626819291069, "grad_norm": 0.09352393991311736, "learning_rate": 2.8302062110485196e-05, "loss": 0.8388, "step": 218510 }, { "epoch": 3.8364437577906916, "grad_norm": 0.043923905637185415, "learning_rate": 2.8294498266054857e-05, "loss": 0.8372, "step": 218520 }, { "epoch": 3.836619322670693, "grad_norm": 0.05355854751146365, "learning_rate": 2.8286935342817155e-05, "loss": 0.8431, "step": 218530 }, { "epoch": 3.8367948875506945, "grad_norm": 0.051689072993707494, "learning_rate": 2.8279373340867954e-05, "loss": 0.847, "step": 218540 }, { "epoch": 3.8369704524306956, "grad_norm": 0.0485079503679546, "learning_rate": 2.8271812260302998e-05, "loss": 0.8387, "step": 218550 }, { "epoch": 3.837146017310697, "grad_norm": 0.050165946133104024, "learning_rate": 2.8264252101218125e-05, "loss": 0.8448, "step": 218560 }, { "epoch": 3.8373215821906985, "grad_norm": 0.049819104218074506, "learning_rate": 2.825669286370911e-05, "loss": 0.8416, "step": 218570 }, { "epoch": 3.8374971470707, "grad_norm": 0.05945672059659792, "learning_rate": 2.824913454787178e-05, "loss": 0.8304, "step": 218580 }, { "epoch": 3.8376727119507015, "grad_norm": 0.0633014695748565, "learning_rate": 2.824157715380181e-05, "loss": 0.8367, "step": 218590 }, { "epoch": 3.8378482768307025, "grad_norm": 0.0582198727770911, "learning_rate": 2.8234020681595015e-05, "loss": 0.8288, "step": 218600 }, { "epoch": 3.8380238417107044, "grad_norm": 0.05437772782170409, "learning_rate": 2.8226465131347152e-05, "loss": 0.8469, "step": 218610 }, { "epoch": 3.8381994065907055, "grad_norm": 0.05166921122087655, "learning_rate": 2.821891050315385e-05, "loss": 0.8384, "step": 218620 }, { "epoch": 3.838374971470707, "grad_norm": 0.052751713991498826, "learning_rate": 2.8211356797110982e-05, "loss": 0.8432, "step": 218630 }, { "epoch": 3.8385505363507084, "grad_norm": 0.05840720095844199, "learning_rate": 2.820380401331412e-05, "loss": 0.851, "step": 218640 }, { "epoch": 3.83872610123071, "grad_norm": 0.05596873595176756, "learning_rate": 2.8196252151859054e-05, "loss": 0.8344, "step": 218650 }, { "epoch": 3.8389016661107114, "grad_norm": 0.04513060176858437, "learning_rate": 2.8188701212841395e-05, "loss": 0.8401, "step": 218660 }, { "epoch": 3.8390772309907124, "grad_norm": 0.044955824974307376, "learning_rate": 2.8181151196356853e-05, "loss": 0.85, "step": 218670 }, { "epoch": 3.839252795870714, "grad_norm": 0.06490679519715688, "learning_rate": 2.81736021025011e-05, "loss": 0.8427, "step": 218680 }, { "epoch": 3.8394283607507154, "grad_norm": 0.04644190358315839, "learning_rate": 2.816605393136974e-05, "loss": 0.8383, "step": 218690 }, { "epoch": 3.839603925630717, "grad_norm": 0.07216746717606763, "learning_rate": 2.815850668305844e-05, "loss": 0.8419, "step": 218700 }, { "epoch": 3.8397794905107183, "grad_norm": 0.05102942508802779, "learning_rate": 2.815096035766285e-05, "loss": 0.8394, "step": 218710 }, { "epoch": 3.83995505539072, "grad_norm": 0.05705069223036557, "learning_rate": 2.8143414955278514e-05, "loss": 0.8358, "step": 218720 }, { "epoch": 3.8401306202707213, "grad_norm": 0.05118189626464809, "learning_rate": 2.813587047600107e-05, "loss": 0.8384, "step": 218730 }, { "epoch": 3.8403061851507223, "grad_norm": 0.043962997021085304, "learning_rate": 2.812832691992613e-05, "loss": 0.8368, "step": 218740 }, { "epoch": 3.840481750030724, "grad_norm": 0.045758617622988274, "learning_rate": 2.8120784287149278e-05, "loss": 0.8411, "step": 218750 }, { "epoch": 3.8406573149107253, "grad_norm": 0.05811093720257758, "learning_rate": 2.8113242577766014e-05, "loss": 0.8457, "step": 218760 }, { "epoch": 3.8408328797907267, "grad_norm": 0.06408202706290622, "learning_rate": 2.8105701791871946e-05, "loss": 0.8442, "step": 218770 }, { "epoch": 3.841008444670728, "grad_norm": 0.05604478048462434, "learning_rate": 2.8098161929562628e-05, "loss": 0.8385, "step": 218780 }, { "epoch": 3.8411840095507293, "grad_norm": 0.04500563980143818, "learning_rate": 2.8090622990933523e-05, "loss": 0.843, "step": 218790 }, { "epoch": 3.8413595744307307, "grad_norm": 0.06166870751693459, "learning_rate": 2.808308497608024e-05, "loss": 0.8377, "step": 218800 }, { "epoch": 3.841535139310732, "grad_norm": 0.05785331788004471, "learning_rate": 2.807554788509823e-05, "loss": 0.842, "step": 218810 }, { "epoch": 3.8417107041907337, "grad_norm": 0.06194902497771558, "learning_rate": 2.8068011718083036e-05, "loss": 0.835, "step": 218820 }, { "epoch": 3.841886269070735, "grad_norm": 0.05585043216398764, "learning_rate": 2.806047647513007e-05, "loss": 0.84, "step": 218830 }, { "epoch": 3.8420618339507366, "grad_norm": 0.046718488503334155, "learning_rate": 2.8052942156334854e-05, "loss": 0.8399, "step": 218840 }, { "epoch": 3.842237398830738, "grad_norm": 0.047197391820726284, "learning_rate": 2.8045408761792863e-05, "loss": 0.832, "step": 218850 }, { "epoch": 3.842412963710739, "grad_norm": 0.04658474858146443, "learning_rate": 2.80378762915995e-05, "loss": 0.8488, "step": 218860 }, { "epoch": 3.8425885285907406, "grad_norm": 0.041758361819475835, "learning_rate": 2.803034474585023e-05, "loss": 0.838, "step": 218870 }, { "epoch": 3.842764093470742, "grad_norm": 0.06277328586329195, "learning_rate": 2.8022814124640502e-05, "loss": 0.8394, "step": 218880 }, { "epoch": 3.8429396583507436, "grad_norm": 0.06499075806884659, "learning_rate": 2.801528442806567e-05, "loss": 0.8384, "step": 218890 }, { "epoch": 3.843115223230745, "grad_norm": 0.04786474407327228, "learning_rate": 2.8007755656221178e-05, "loss": 0.8462, "step": 218900 }, { "epoch": 3.843290788110746, "grad_norm": 0.0442158492408872, "learning_rate": 2.8000227809202417e-05, "loss": 0.8411, "step": 218910 }, { "epoch": 3.8434663529907476, "grad_norm": 0.059701992922136585, "learning_rate": 2.7992700887104782e-05, "loss": 0.8451, "step": 218920 }, { "epoch": 3.843641917870749, "grad_norm": 0.048779777010636946, "learning_rate": 2.798517489002359e-05, "loss": 0.8432, "step": 218930 }, { "epoch": 3.8438174827507505, "grad_norm": 0.04363513962422197, "learning_rate": 2.797764981805423e-05, "loss": 0.8348, "step": 218940 }, { "epoch": 3.843993047630752, "grad_norm": 0.050645610861058336, "learning_rate": 2.7970125671292063e-05, "loss": 0.8348, "step": 218950 }, { "epoch": 3.8441686125107535, "grad_norm": 0.04838799353295888, "learning_rate": 2.796260244983234e-05, "loss": 0.833, "step": 218960 }, { "epoch": 3.844344177390755, "grad_norm": 0.06237712831766086, "learning_rate": 2.79550801537705e-05, "loss": 0.8422, "step": 218970 }, { "epoch": 3.844519742270756, "grad_norm": 0.04255864791706977, "learning_rate": 2.7947558783201776e-05, "loss": 0.85, "step": 218980 }, { "epoch": 3.8446953071507575, "grad_norm": 0.06966975190174646, "learning_rate": 2.7940038338221503e-05, "loss": 0.8475, "step": 218990 }, { "epoch": 3.844870872030759, "grad_norm": 0.05713200132591878, "learning_rate": 2.7932518818924924e-05, "loss": 0.8309, "step": 219000 }, { "epoch": 3.8450464369107604, "grad_norm": 0.05060929306082336, "learning_rate": 2.7925000225407327e-05, "loss": 0.8362, "step": 219010 }, { "epoch": 3.845222001790762, "grad_norm": 0.04827013355956168, "learning_rate": 2.791748255776402e-05, "loss": 0.842, "step": 219020 }, { "epoch": 3.845397566670763, "grad_norm": 0.05710141796963532, "learning_rate": 2.790996581609018e-05, "loss": 0.8435, "step": 219030 }, { "epoch": 3.845573131550765, "grad_norm": 0.07920795934782872, "learning_rate": 2.790245000048108e-05, "loss": 0.84, "step": 219040 }, { "epoch": 3.845748696430766, "grad_norm": 0.055841374294081134, "learning_rate": 2.789493511103197e-05, "loss": 0.8349, "step": 219050 }, { "epoch": 3.8459242613107674, "grad_norm": 0.07859509973885805, "learning_rate": 2.7887421147838018e-05, "loss": 0.84, "step": 219060 }, { "epoch": 3.846099826190769, "grad_norm": 0.06759180872247454, "learning_rate": 2.7879908110994458e-05, "loss": 0.8374, "step": 219070 }, { "epoch": 3.8462753910707703, "grad_norm": 0.04704867044123688, "learning_rate": 2.7872396000596474e-05, "loss": 0.8368, "step": 219080 }, { "epoch": 3.846450955950772, "grad_norm": 0.07737852523372644, "learning_rate": 2.786488481673928e-05, "loss": 0.8337, "step": 219090 }, { "epoch": 3.846626520830773, "grad_norm": 0.058699116156820864, "learning_rate": 2.785737455951798e-05, "loss": 0.8391, "step": 219100 }, { "epoch": 3.8468020857107743, "grad_norm": 0.05087541050617973, "learning_rate": 2.7849865229027748e-05, "loss": 0.829, "step": 219110 }, { "epoch": 3.846977650590776, "grad_norm": 0.047261070885180556, "learning_rate": 2.7842356825363787e-05, "loss": 0.8369, "step": 219120 }, { "epoch": 3.8471532154707773, "grad_norm": 0.047084120701732055, "learning_rate": 2.7834849348621123e-05, "loss": 0.8472, "step": 219130 }, { "epoch": 3.8473287803507787, "grad_norm": 0.06734470497782259, "learning_rate": 2.7827342798895006e-05, "loss": 0.8403, "step": 219140 }, { "epoch": 3.8475043452307798, "grad_norm": 0.03862217525995662, "learning_rate": 2.7819837176280433e-05, "loss": 0.8402, "step": 219150 }, { "epoch": 3.8476799101107817, "grad_norm": 0.051618892189436126, "learning_rate": 2.7812332480872587e-05, "loss": 0.8424, "step": 219160 }, { "epoch": 3.8478554749907827, "grad_norm": 0.04737372027883426, "learning_rate": 2.7804828712766494e-05, "loss": 0.8364, "step": 219170 }, { "epoch": 3.848031039870784, "grad_norm": 0.04815093514194723, "learning_rate": 2.779732587205724e-05, "loss": 0.845, "step": 219180 }, { "epoch": 3.8482066047507857, "grad_norm": 0.05293595872308211, "learning_rate": 2.7789823958839927e-05, "loss": 0.8383, "step": 219190 }, { "epoch": 3.848382169630787, "grad_norm": 0.053440074841171525, "learning_rate": 2.778232297320954e-05, "loss": 0.8403, "step": 219200 }, { "epoch": 3.8485577345107886, "grad_norm": 0.04752215232546679, "learning_rate": 2.777482291526116e-05, "loss": 0.841, "step": 219210 }, { "epoch": 3.8487332993907897, "grad_norm": 0.0513170924959201, "learning_rate": 2.776732378508983e-05, "loss": 0.8323, "step": 219220 }, { "epoch": 3.848908864270791, "grad_norm": 0.05520270351482909, "learning_rate": 2.7759825582790523e-05, "loss": 0.8495, "step": 219230 }, { "epoch": 3.8490844291507926, "grad_norm": 0.04720325976061258, "learning_rate": 2.7752328308458254e-05, "loss": 0.8334, "step": 219240 }, { "epoch": 3.849259994030794, "grad_norm": 0.042642514744637, "learning_rate": 2.774483196218803e-05, "loss": 0.834, "step": 219250 }, { "epoch": 3.8494355589107956, "grad_norm": 0.043755223113268384, "learning_rate": 2.7737336544074854e-05, "loss": 0.8396, "step": 219260 }, { "epoch": 3.8496111237907966, "grad_norm": 0.05370640005363481, "learning_rate": 2.7729842054213627e-05, "loss": 0.8417, "step": 219270 }, { "epoch": 3.8497866886707985, "grad_norm": 0.05756470015149237, "learning_rate": 2.7722348492699345e-05, "loss": 0.8435, "step": 219280 }, { "epoch": 3.8499622535507996, "grad_norm": 0.046676551854337646, "learning_rate": 2.7714855859626987e-05, "loss": 0.8401, "step": 219290 }, { "epoch": 3.850137818430801, "grad_norm": 0.048272264890516066, "learning_rate": 2.7707364155091394e-05, "loss": 0.8329, "step": 219300 }, { "epoch": 3.8503133833108025, "grad_norm": 0.06607993492842663, "learning_rate": 2.7699873379187592e-05, "loss": 0.8368, "step": 219310 }, { "epoch": 3.850488948190804, "grad_norm": 0.053986448515217576, "learning_rate": 2.7692383532010413e-05, "loss": 0.8364, "step": 219320 }, { "epoch": 3.8506645130708055, "grad_norm": 0.05422751882106154, "learning_rate": 2.768489461365482e-05, "loss": 0.8395, "step": 219330 }, { "epoch": 3.8508400779508065, "grad_norm": 0.04697555290739746, "learning_rate": 2.7677406624215635e-05, "loss": 0.8353, "step": 219340 }, { "epoch": 3.851015642830808, "grad_norm": 0.055276727725604595, "learning_rate": 2.7669919563787755e-05, "loss": 0.8402, "step": 219350 }, { "epoch": 3.8511912077108095, "grad_norm": 0.04796310299523721, "learning_rate": 2.7662433432466073e-05, "loss": 0.8404, "step": 219360 }, { "epoch": 3.851366772590811, "grad_norm": 0.0648017285478401, "learning_rate": 2.7654948230345395e-05, "loss": 0.8396, "step": 219370 }, { "epoch": 3.8515423374708124, "grad_norm": 0.08281337946208898, "learning_rate": 2.7647463957520577e-05, "loss": 0.8373, "step": 219380 }, { "epoch": 3.851717902350814, "grad_norm": 0.04601857719279901, "learning_rate": 2.7639980614086474e-05, "loss": 0.8412, "step": 219390 }, { "epoch": 3.8518934672308154, "grad_norm": 0.06140057852682502, "learning_rate": 2.763249820013785e-05, "loss": 0.8376, "step": 219400 }, { "epoch": 3.8520690321108164, "grad_norm": 0.05154354951646932, "learning_rate": 2.762501671576953e-05, "loss": 0.8442, "step": 219410 }, { "epoch": 3.852244596990818, "grad_norm": 0.054919854762860286, "learning_rate": 2.761753616107631e-05, "loss": 0.8444, "step": 219420 }, { "epoch": 3.8524201618708194, "grad_norm": 0.1315625722015553, "learning_rate": 2.7610056536153006e-05, "loss": 0.8395, "step": 219430 }, { "epoch": 3.852595726750821, "grad_norm": 0.04711718851976625, "learning_rate": 2.7602577841094317e-05, "loss": 0.8452, "step": 219440 }, { "epoch": 3.8527712916308223, "grad_norm": 0.06178117331809661, "learning_rate": 2.7595100075995047e-05, "loss": 0.8328, "step": 219450 }, { "epoch": 3.8529468565108234, "grad_norm": 0.04838934072706186, "learning_rate": 2.7587623240949946e-05, "loss": 0.8398, "step": 219460 }, { "epoch": 3.853122421390825, "grad_norm": 0.06263911156102961, "learning_rate": 2.7580147336053656e-05, "loss": 0.8414, "step": 219470 }, { "epoch": 3.8532979862708263, "grad_norm": 0.04385880024959037, "learning_rate": 2.757267236140104e-05, "loss": 0.8329, "step": 219480 }, { "epoch": 3.853473551150828, "grad_norm": 0.04807344946702375, "learning_rate": 2.756519831708671e-05, "loss": 0.8384, "step": 219490 }, { "epoch": 3.8536491160308293, "grad_norm": 0.05520861849586192, "learning_rate": 2.755772520320543e-05, "loss": 0.8449, "step": 219500 }, { "epoch": 3.8538246809108307, "grad_norm": 0.04594010517425029, "learning_rate": 2.7550253019851807e-05, "loss": 0.8398, "step": 219510 }, { "epoch": 3.8540002457908322, "grad_norm": 0.04444677894261225, "learning_rate": 2.754278176712055e-05, "loss": 0.8451, "step": 219520 }, { "epoch": 3.8541758106708333, "grad_norm": 0.06401653575513012, "learning_rate": 2.753531144510636e-05, "loss": 0.8419, "step": 219530 }, { "epoch": 3.8543513755508347, "grad_norm": 0.05587957322349888, "learning_rate": 2.752784205390383e-05, "loss": 0.8381, "step": 219540 }, { "epoch": 3.854526940430836, "grad_norm": 0.04691480143224526, "learning_rate": 2.7520373593607613e-05, "loss": 0.8379, "step": 219550 }, { "epoch": 3.8547025053108377, "grad_norm": 0.057707112635505, "learning_rate": 2.7512906064312378e-05, "loss": 0.8431, "step": 219560 }, { "epoch": 3.854878070190839, "grad_norm": 0.06142877287486154, "learning_rate": 2.7505439466112676e-05, "loss": 0.841, "step": 219570 }, { "epoch": 3.85505363507084, "grad_norm": 0.04622621599027552, "learning_rate": 2.7497973799103144e-05, "loss": 0.8409, "step": 219580 }, { "epoch": 3.8552291999508417, "grad_norm": 0.044986058361318214, "learning_rate": 2.749050906337837e-05, "loss": 0.8409, "step": 219590 }, { "epoch": 3.855404764830843, "grad_norm": 0.07789103256657255, "learning_rate": 2.7483045259032962e-05, "loss": 0.839, "step": 219600 }, { "epoch": 3.8555803297108446, "grad_norm": 0.04304814923066816, "learning_rate": 2.747558238616142e-05, "loss": 0.8426, "step": 219610 }, { "epoch": 3.855755894590846, "grad_norm": 0.056027129552569076, "learning_rate": 2.746812044485836e-05, "loss": 0.8346, "step": 219620 }, { "epoch": 3.8559314594708476, "grad_norm": 0.04877868982638914, "learning_rate": 2.7460659435218323e-05, "loss": 0.8412, "step": 219630 }, { "epoch": 3.856107024350849, "grad_norm": 0.05131186744240273, "learning_rate": 2.7453199357335774e-05, "loss": 0.8384, "step": 219640 }, { "epoch": 3.85628258923085, "grad_norm": 0.05050551424641952, "learning_rate": 2.744574021130533e-05, "loss": 0.8322, "step": 219650 }, { "epoch": 3.8564581541108516, "grad_norm": 0.052580156217489564, "learning_rate": 2.7438281997221443e-05, "loss": 0.8386, "step": 219660 }, { "epoch": 3.856633718990853, "grad_norm": 0.0516205556855635, "learning_rate": 2.7430824715178632e-05, "loss": 0.8471, "step": 219670 }, { "epoch": 3.8568092838708545, "grad_norm": 0.05913543396641111, "learning_rate": 2.7423368365271356e-05, "loss": 0.8409, "step": 219680 }, { "epoch": 3.856984848750856, "grad_norm": 0.04611177466452878, "learning_rate": 2.7415912947594097e-05, "loss": 0.8436, "step": 219690 }, { "epoch": 3.857160413630857, "grad_norm": 0.05051800936031428, "learning_rate": 2.7408458462241348e-05, "loss": 0.8485, "step": 219700 }, { "epoch": 3.857335978510859, "grad_norm": 0.056106112646638076, "learning_rate": 2.740100490930752e-05, "loss": 0.8322, "step": 219710 }, { "epoch": 3.85751154339086, "grad_norm": 0.058245128026988204, "learning_rate": 2.7393552288887048e-05, "loss": 0.8411, "step": 219720 }, { "epoch": 3.8576871082708615, "grad_norm": 0.0569899594654877, "learning_rate": 2.7386100601074422e-05, "loss": 0.8364, "step": 219730 }, { "epoch": 3.857862673150863, "grad_norm": 0.05074267068500313, "learning_rate": 2.7378649845963968e-05, "loss": 0.8474, "step": 219740 }, { "epoch": 3.8580382380308644, "grad_norm": 0.06001791742419816, "learning_rate": 2.7371200023650135e-05, "loss": 0.8408, "step": 219750 }, { "epoch": 3.858213802910866, "grad_norm": 0.050315371637615076, "learning_rate": 2.7363751134227305e-05, "loss": 0.8413, "step": 219760 }, { "epoch": 3.858389367790867, "grad_norm": 0.04667407663644479, "learning_rate": 2.7356303177789887e-05, "loss": 0.8354, "step": 219770 }, { "epoch": 3.8585649326708684, "grad_norm": 0.04294186151886764, "learning_rate": 2.7348856154432197e-05, "loss": 0.8382, "step": 219780 }, { "epoch": 3.85874049755087, "grad_norm": 0.04814456719860335, "learning_rate": 2.7341410064248614e-05, "loss": 0.8434, "step": 219790 }, { "epoch": 3.8589160624308714, "grad_norm": 0.04391458677720737, "learning_rate": 2.733396490733351e-05, "loss": 0.8375, "step": 219800 }, { "epoch": 3.859091627310873, "grad_norm": 0.05996067588479444, "learning_rate": 2.732652068378113e-05, "loss": 0.837, "step": 219810 }, { "epoch": 3.859267192190874, "grad_norm": 0.06486119562804217, "learning_rate": 2.7319077393685916e-05, "loss": 0.8445, "step": 219820 }, { "epoch": 3.859442757070876, "grad_norm": 0.04909671484502833, "learning_rate": 2.7311635037142073e-05, "loss": 0.8452, "step": 219830 }, { "epoch": 3.859618321950877, "grad_norm": 0.05819495021616859, "learning_rate": 2.730419361424398e-05, "loss": 0.8392, "step": 219840 }, { "epoch": 3.8597938868308783, "grad_norm": 0.05050589900022409, "learning_rate": 2.7296753125085832e-05, "loss": 0.8341, "step": 219850 }, { "epoch": 3.85996945171088, "grad_norm": 0.059585097748732184, "learning_rate": 2.728931356976196e-05, "loss": 0.8364, "step": 219860 }, { "epoch": 3.8601450165908813, "grad_norm": 0.05032041959338376, "learning_rate": 2.7281874948366635e-05, "loss": 0.8419, "step": 219870 }, { "epoch": 3.8603205814708828, "grad_norm": 0.05632569125041839, "learning_rate": 2.7274437260994055e-05, "loss": 0.8451, "step": 219880 }, { "epoch": 3.860496146350884, "grad_norm": 0.04717146926614923, "learning_rate": 2.7267000507738483e-05, "loss": 0.8423, "step": 219890 }, { "epoch": 3.8606717112308853, "grad_norm": 0.07084589564966515, "learning_rate": 2.725956468869418e-05, "loss": 0.8376, "step": 219900 }, { "epoch": 3.8608472761108867, "grad_norm": 0.05450275988637983, "learning_rate": 2.7252129803955283e-05, "loss": 0.8458, "step": 219910 }, { "epoch": 3.861022840990888, "grad_norm": 0.04429101652341201, "learning_rate": 2.7244695853616047e-05, "loss": 0.8406, "step": 219920 }, { "epoch": 3.8611984058708897, "grad_norm": 0.05005283784487549, "learning_rate": 2.723726283777065e-05, "loss": 0.8389, "step": 219930 }, { "epoch": 3.8613739707508907, "grad_norm": 0.05366841644020899, "learning_rate": 2.7229830756513287e-05, "loss": 0.8415, "step": 219940 }, { "epoch": 3.8615495356308926, "grad_norm": 0.05389590765979715, "learning_rate": 2.722239960993808e-05, "loss": 0.8424, "step": 219950 }, { "epoch": 3.8617251005108937, "grad_norm": 0.060807823931956204, "learning_rate": 2.721496939813922e-05, "loss": 0.8371, "step": 219960 }, { "epoch": 3.861900665390895, "grad_norm": 0.06649100002467083, "learning_rate": 2.7207540121210855e-05, "loss": 0.8387, "step": 219970 }, { "epoch": 3.8620762302708966, "grad_norm": 0.06519891249278198, "learning_rate": 2.720011177924705e-05, "loss": 0.8337, "step": 219980 }, { "epoch": 3.862251795150898, "grad_norm": 0.05232684192002305, "learning_rate": 2.719268437234203e-05, "loss": 0.8416, "step": 219990 }, { "epoch": 3.8624273600308996, "grad_norm": 0.05722671260921137, "learning_rate": 2.7185257900589815e-05, "loss": 0.8401, "step": 220000 }, { "epoch": 3.8626029249109006, "grad_norm": 0.053624587032104744, "learning_rate": 2.7177832364084556e-05, "loss": 0.8472, "step": 220010 }, { "epoch": 3.862778489790902, "grad_norm": 0.04881976284620709, "learning_rate": 2.717040776292028e-05, "loss": 0.8389, "step": 220020 }, { "epoch": 3.8629540546709036, "grad_norm": 0.047886141191370496, "learning_rate": 2.716298409719108e-05, "loss": 0.8393, "step": 220030 }, { "epoch": 3.863129619550905, "grad_norm": 0.05457806072062649, "learning_rate": 2.7155561366991056e-05, "loss": 0.8391, "step": 220040 }, { "epoch": 3.8633051844309065, "grad_norm": 0.04846938046021353, "learning_rate": 2.71481395724142e-05, "loss": 0.8368, "step": 220050 }, { "epoch": 3.8634807493109076, "grad_norm": 0.06532820578125792, "learning_rate": 2.7140718713554563e-05, "loss": 0.8357, "step": 220060 }, { "epoch": 3.8636563141909095, "grad_norm": 0.04458342277601709, "learning_rate": 2.7133298790506208e-05, "loss": 0.8447, "step": 220070 }, { "epoch": 3.8638318790709105, "grad_norm": 0.05969064847551637, "learning_rate": 2.712587980336308e-05, "loss": 0.8451, "step": 220080 }, { "epoch": 3.864007443950912, "grad_norm": 0.06443918176955588, "learning_rate": 2.7118461752219214e-05, "loss": 0.838, "step": 220090 }, { "epoch": 3.8641830088309135, "grad_norm": 0.06979883279936493, "learning_rate": 2.7111044637168597e-05, "loss": 0.839, "step": 220100 }, { "epoch": 3.864358573710915, "grad_norm": 0.05685488500775629, "learning_rate": 2.7103628458305237e-05, "loss": 0.8425, "step": 220110 }, { "epoch": 3.8645341385909164, "grad_norm": 0.046975027223178256, "learning_rate": 2.7096213215723038e-05, "loss": 0.8457, "step": 220120 }, { "epoch": 3.8647097034709175, "grad_norm": 0.05302184229414454, "learning_rate": 2.7088798909515967e-05, "loss": 0.8397, "step": 220130 }, { "epoch": 3.864885268350919, "grad_norm": 0.05951799153742412, "learning_rate": 2.7081385539778027e-05, "loss": 0.8345, "step": 220140 }, { "epoch": 3.8650608332309204, "grad_norm": 0.04060953475397635, "learning_rate": 2.7073973106603025e-05, "loss": 0.8469, "step": 220150 }, { "epoch": 3.865236398110922, "grad_norm": 0.05727645565186854, "learning_rate": 2.7066561610085027e-05, "loss": 0.8469, "step": 220160 }, { "epoch": 3.8654119629909234, "grad_norm": 0.06176063818797023, "learning_rate": 2.7059151050317815e-05, "loss": 0.8374, "step": 220170 }, { "epoch": 3.865587527870925, "grad_norm": 0.0673599264729277, "learning_rate": 2.7051741427395372e-05, "loss": 0.848, "step": 220180 }, { "epoch": 3.8657630927509263, "grad_norm": 0.05557940120785957, "learning_rate": 2.7044332741411505e-05, "loss": 0.8293, "step": 220190 }, { "epoch": 3.8659386576309274, "grad_norm": 0.046946939461686585, "learning_rate": 2.7036924992460116e-05, "loss": 0.844, "step": 220200 }, { "epoch": 3.866114222510929, "grad_norm": 0.047504131632685215, "learning_rate": 2.7029518180635087e-05, "loss": 0.8325, "step": 220210 }, { "epoch": 3.8662897873909303, "grad_norm": 0.055738167020618984, "learning_rate": 2.702211230603021e-05, "loss": 0.83, "step": 220220 }, { "epoch": 3.866465352270932, "grad_norm": 0.0497215717921726, "learning_rate": 2.7014707368739354e-05, "loss": 0.8347, "step": 220230 }, { "epoch": 3.8666409171509333, "grad_norm": 0.05931664951213601, "learning_rate": 2.7007303368856367e-05, "loss": 0.8345, "step": 220240 }, { "epoch": 3.8668164820309343, "grad_norm": 0.0484587104498967, "learning_rate": 2.6999900306474984e-05, "loss": 0.8435, "step": 220250 }, { "epoch": 3.866992046910936, "grad_norm": 0.05750181634594695, "learning_rate": 2.6992498181689063e-05, "loss": 0.8395, "step": 220260 }, { "epoch": 3.8671676117909373, "grad_norm": 0.05566113139043902, "learning_rate": 2.698509699459235e-05, "loss": 0.8353, "step": 220270 }, { "epoch": 3.8673431766709387, "grad_norm": 0.06630100486406547, "learning_rate": 2.6977696745278696e-05, "loss": 0.8492, "step": 220280 }, { "epoch": 3.86751874155094, "grad_norm": 0.052657499664860205, "learning_rate": 2.6970297433841775e-05, "loss": 0.8375, "step": 220290 }, { "epoch": 3.8676943064309417, "grad_norm": 0.07131768821310311, "learning_rate": 2.6962899060375363e-05, "loss": 0.8428, "step": 220300 }, { "epoch": 3.867869871310943, "grad_norm": 0.060320645301357646, "learning_rate": 2.695550162497325e-05, "loss": 0.8453, "step": 220310 }, { "epoch": 3.868045436190944, "grad_norm": 0.05187010362532609, "learning_rate": 2.6948105127729052e-05, "loss": 0.8418, "step": 220320 }, { "epoch": 3.8682210010709457, "grad_norm": 0.05962915842061145, "learning_rate": 2.6940709568736626e-05, "loss": 0.8318, "step": 220330 }, { "epoch": 3.868396565950947, "grad_norm": 0.046812808605633194, "learning_rate": 2.693331494808956e-05, "loss": 0.8361, "step": 220340 }, { "epoch": 3.8685721308309486, "grad_norm": 0.05518716822310761, "learning_rate": 2.6925921265881636e-05, "loss": 0.8399, "step": 220350 }, { "epoch": 3.86874769571095, "grad_norm": 0.04851631283382004, "learning_rate": 2.6918528522206437e-05, "loss": 0.8409, "step": 220360 }, { "epoch": 3.868923260590951, "grad_norm": 0.05270176661893151, "learning_rate": 2.6911136717157693e-05, "loss": 0.8351, "step": 220370 }, { "epoch": 3.8690988254709526, "grad_norm": 0.051455604797407885, "learning_rate": 2.6903745850829076e-05, "loss": 0.836, "step": 220380 }, { "epoch": 3.869274390350954, "grad_norm": 0.047157502840221924, "learning_rate": 2.6896355923314155e-05, "loss": 0.8383, "step": 220390 }, { "epoch": 3.8694499552309556, "grad_norm": 0.053858488258449314, "learning_rate": 2.6888966934706617e-05, "loss": 0.8421, "step": 220400 }, { "epoch": 3.869625520110957, "grad_norm": 0.0759310734525289, "learning_rate": 2.6881578885100102e-05, "loss": 0.8347, "step": 220410 }, { "epoch": 3.8698010849909585, "grad_norm": 0.04960780661828362, "learning_rate": 2.6874191774588154e-05, "loss": 0.8366, "step": 220420 }, { "epoch": 3.86997664987096, "grad_norm": 0.05689180925936284, "learning_rate": 2.68668056032644e-05, "loss": 0.8415, "step": 220430 }, { "epoch": 3.870152214750961, "grad_norm": 0.05229229612174201, "learning_rate": 2.6859420371222432e-05, "loss": 0.8459, "step": 220440 }, { "epoch": 3.8703277796309625, "grad_norm": 0.04504153012892235, "learning_rate": 2.6852036078555843e-05, "loss": 0.8313, "step": 220450 }, { "epoch": 3.870503344510964, "grad_norm": 0.04491783278185149, "learning_rate": 2.6844652725358144e-05, "loss": 0.8407, "step": 220460 }, { "epoch": 3.8706789093909655, "grad_norm": 0.04318533483295248, "learning_rate": 2.68372703117229e-05, "loss": 0.8406, "step": 220470 }, { "epoch": 3.870854474270967, "grad_norm": 0.04925671442926208, "learning_rate": 2.682988883774369e-05, "loss": 0.8364, "step": 220480 }, { "epoch": 3.871030039150968, "grad_norm": 0.05438872879104268, "learning_rate": 2.682250830351394e-05, "loss": 0.8425, "step": 220490 }, { "epoch": 3.87120560403097, "grad_norm": 0.05195200259622004, "learning_rate": 2.681512870912728e-05, "loss": 0.8412, "step": 220500 }, { "epoch": 3.871381168910971, "grad_norm": 0.046271007792777964, "learning_rate": 2.6807750054677133e-05, "loss": 0.8337, "step": 220510 }, { "epoch": 3.8715567337909724, "grad_norm": 0.06291780767024757, "learning_rate": 2.680037234025704e-05, "loss": 0.8401, "step": 220520 }, { "epoch": 3.871732298670974, "grad_norm": 0.09308963747220991, "learning_rate": 2.6792995565960417e-05, "loss": 0.8395, "step": 220530 }, { "epoch": 3.8719078635509754, "grad_norm": 0.058544086619930574, "learning_rate": 2.678561973188076e-05, "loss": 0.8385, "step": 220540 }, { "epoch": 3.872083428430977, "grad_norm": 0.061352725322756034, "learning_rate": 2.6778244838111567e-05, "loss": 0.8403, "step": 220550 }, { "epoch": 3.872258993310978, "grad_norm": 0.047951729834129, "learning_rate": 2.67708708847462e-05, "loss": 0.8401, "step": 220560 }, { "epoch": 3.8724345581909794, "grad_norm": 0.06538290401109517, "learning_rate": 2.6763497871878118e-05, "loss": 0.8261, "step": 220570 }, { "epoch": 3.872610123070981, "grad_norm": 0.055707354191190356, "learning_rate": 2.6756125799600777e-05, "loss": 0.8363, "step": 220580 }, { "epoch": 3.8727856879509823, "grad_norm": 0.05206706863538927, "learning_rate": 2.6748754668007538e-05, "loss": 0.8383, "step": 220590 }, { "epoch": 3.872961252830984, "grad_norm": 0.04362964080349281, "learning_rate": 2.6741384477191783e-05, "loss": 0.8388, "step": 220600 }, { "epoch": 3.873136817710985, "grad_norm": 0.05368436319273393, "learning_rate": 2.6734015227246935e-05, "loss": 0.8457, "step": 220610 }, { "epoch": 3.8733123825909868, "grad_norm": 0.05469917198355411, "learning_rate": 2.6726646918266373e-05, "loss": 0.8377, "step": 220620 }, { "epoch": 3.873487947470988, "grad_norm": 0.04865378306889885, "learning_rate": 2.67192795503434e-05, "loss": 0.8369, "step": 220630 }, { "epoch": 3.8736635123509893, "grad_norm": 0.052306257010666324, "learning_rate": 2.671191312357138e-05, "loss": 0.8357, "step": 220640 }, { "epoch": 3.8738390772309907, "grad_norm": 0.05326203132754801, "learning_rate": 2.67045476380437e-05, "loss": 0.8434, "step": 220650 }, { "epoch": 3.8740146421109922, "grad_norm": 0.04734024574186834, "learning_rate": 2.6697183093853584e-05, "loss": 0.8525, "step": 220660 }, { "epoch": 3.8741902069909937, "grad_norm": 0.059027279408854806, "learning_rate": 2.668981949109444e-05, "loss": 0.8464, "step": 220670 }, { "epoch": 3.8743657718709947, "grad_norm": 0.04677270543880885, "learning_rate": 2.668245682985951e-05, "loss": 0.8445, "step": 220680 }, { "epoch": 3.874541336750996, "grad_norm": 0.04985757364837736, "learning_rate": 2.6675095110242125e-05, "loss": 0.8344, "step": 220690 }, { "epoch": 3.8747169016309977, "grad_norm": 0.05519134261390024, "learning_rate": 2.6667734332335493e-05, "loss": 0.8427, "step": 220700 }, { "epoch": 3.874892466510999, "grad_norm": 0.05499054073448064, "learning_rate": 2.6660374496232916e-05, "loss": 0.8475, "step": 220710 }, { "epoch": 3.8750680313910006, "grad_norm": 0.04592358681721682, "learning_rate": 2.665301560202767e-05, "loss": 0.8294, "step": 220720 }, { "epoch": 3.8752435962710017, "grad_norm": 0.05098805163174184, "learning_rate": 2.6645657649812934e-05, "loss": 0.8386, "step": 220730 }, { "epoch": 3.8754191611510036, "grad_norm": 0.06009740109607623, "learning_rate": 2.6638300639681964e-05, "loss": 0.8402, "step": 220740 }, { "epoch": 3.8755947260310046, "grad_norm": 0.0473954147535542, "learning_rate": 2.6630944571728e-05, "loss": 0.8407, "step": 220750 }, { "epoch": 3.875770290911006, "grad_norm": 0.04444707969126709, "learning_rate": 2.6623589446044206e-05, "loss": 0.8383, "step": 220760 }, { "epoch": 3.8759458557910076, "grad_norm": 0.04205555989837212, "learning_rate": 2.6616235262723776e-05, "loss": 0.8361, "step": 220770 }, { "epoch": 3.876121420671009, "grad_norm": 0.06029900567834572, "learning_rate": 2.660888202185991e-05, "loss": 0.8456, "step": 220780 }, { "epoch": 3.8762969855510105, "grad_norm": 0.04441306795619205, "learning_rate": 2.6601529723545788e-05, "loss": 0.8334, "step": 220790 }, { "epoch": 3.8764725504310116, "grad_norm": 0.04723494867467694, "learning_rate": 2.6594178367874517e-05, "loss": 0.8376, "step": 220800 }, { "epoch": 3.876648115311013, "grad_norm": 0.05192946058457212, "learning_rate": 2.658682795493927e-05, "loss": 0.8341, "step": 220810 }, { "epoch": 3.8768236801910145, "grad_norm": 0.06264820941800259, "learning_rate": 2.6579478484833194e-05, "loss": 0.843, "step": 220820 }, { "epoch": 3.876999245071016, "grad_norm": 0.06964618847190272, "learning_rate": 2.657212995764934e-05, "loss": 0.8437, "step": 220830 }, { "epoch": 3.8771748099510175, "grad_norm": 0.04860131945867386, "learning_rate": 2.6564782373480907e-05, "loss": 0.8356, "step": 220840 }, { "epoch": 3.877350374831019, "grad_norm": 0.0926491811231367, "learning_rate": 2.6557435732420936e-05, "loss": 0.8399, "step": 220850 }, { "epoch": 3.8775259397110204, "grad_norm": 0.04546755759971942, "learning_rate": 2.6550090034562537e-05, "loss": 0.8497, "step": 220860 }, { "epoch": 3.8777015045910215, "grad_norm": 0.06411854006673885, "learning_rate": 2.6542745279998744e-05, "loss": 0.8375, "step": 220870 }, { "epoch": 3.877877069471023, "grad_norm": 0.049630875439706304, "learning_rate": 2.6535401468822636e-05, "loss": 0.8412, "step": 220880 }, { "epoch": 3.8780526343510244, "grad_norm": 0.045109660703627374, "learning_rate": 2.6528058601127288e-05, "loss": 0.8447, "step": 220890 }, { "epoch": 3.878228199231026, "grad_norm": 0.058932022756407736, "learning_rate": 2.6520716677005685e-05, "loss": 0.8339, "step": 220900 }, { "epoch": 3.8784037641110274, "grad_norm": 0.06875773026184422, "learning_rate": 2.6513375696550874e-05, "loss": 0.8395, "step": 220910 }, { "epoch": 3.8785793289910284, "grad_norm": 0.044494615665701426, "learning_rate": 2.6506035659855894e-05, "loss": 0.8442, "step": 220920 }, { "epoch": 3.87875489387103, "grad_norm": 0.053182608161193974, "learning_rate": 2.649869656701369e-05, "loss": 0.834, "step": 220930 }, { "epoch": 3.8789304587510314, "grad_norm": 0.051196599472405284, "learning_rate": 2.649135841811729e-05, "loss": 0.8399, "step": 220940 }, { "epoch": 3.879106023631033, "grad_norm": 0.05402838229673597, "learning_rate": 2.6484021213259647e-05, "loss": 0.8391, "step": 220950 }, { "epoch": 3.8792815885110343, "grad_norm": 0.0481260627499626, "learning_rate": 2.647668495253377e-05, "loss": 0.8418, "step": 220960 }, { "epoch": 3.879457153391036, "grad_norm": 0.061047032594105176, "learning_rate": 2.646934963603255e-05, "loss": 0.8305, "step": 220970 }, { "epoch": 3.8796327182710373, "grad_norm": 0.043041020409989986, "learning_rate": 2.6462015263848943e-05, "loss": 0.8372, "step": 220980 }, { "epoch": 3.8798082831510383, "grad_norm": 0.05964476306250617, "learning_rate": 2.6454681836075916e-05, "loss": 0.8433, "step": 220990 }, { "epoch": 3.87998384803104, "grad_norm": 0.04755558741769873, "learning_rate": 2.6447349352806316e-05, "loss": 0.8411, "step": 221000 }, { "epoch": 3.8801594129110413, "grad_norm": 0.05637797372418505, "learning_rate": 2.6440017814133128e-05, "loss": 0.8454, "step": 221010 }, { "epoch": 3.8803349777910427, "grad_norm": 0.07004730159323973, "learning_rate": 2.643268722014919e-05, "loss": 0.8332, "step": 221020 }, { "epoch": 3.8805105426710442, "grad_norm": 0.06671428449294227, "learning_rate": 2.6425357570947412e-05, "loss": 0.8439, "step": 221030 }, { "epoch": 3.8806861075510453, "grad_norm": 0.04745360962211555, "learning_rate": 2.641802886662063e-05, "loss": 0.8388, "step": 221040 }, { "epoch": 3.8808616724310467, "grad_norm": 0.06372545258433138, "learning_rate": 2.6410701107261697e-05, "loss": 0.8366, "step": 221050 }, { "epoch": 3.881037237311048, "grad_norm": 0.04380193423821364, "learning_rate": 2.6403374292963524e-05, "loss": 0.854, "step": 221060 }, { "epoch": 3.8812128021910497, "grad_norm": 0.059633111697103225, "learning_rate": 2.6396048423818855e-05, "loss": 0.8312, "step": 221070 }, { "epoch": 3.881388367071051, "grad_norm": 0.05596707411411138, "learning_rate": 2.6388723499920553e-05, "loss": 0.8362, "step": 221080 }, { "epoch": 3.8815639319510526, "grad_norm": 0.05141994283552374, "learning_rate": 2.638139952136145e-05, "loss": 0.8346, "step": 221090 }, { "epoch": 3.881739496831054, "grad_norm": 0.044690151323756394, "learning_rate": 2.6374076488234283e-05, "loss": 0.8478, "step": 221100 }, { "epoch": 3.881915061711055, "grad_norm": 0.05264210470411027, "learning_rate": 2.6366754400631887e-05, "loss": 0.8343, "step": 221110 }, { "epoch": 3.8820906265910566, "grad_norm": 0.04951642373798584, "learning_rate": 2.6359433258647e-05, "loss": 0.8371, "step": 221120 }, { "epoch": 3.882266191471058, "grad_norm": 0.05714658635520264, "learning_rate": 2.6352113062372437e-05, "loss": 0.8377, "step": 221130 }, { "epoch": 3.8824417563510596, "grad_norm": 0.04611798068470271, "learning_rate": 2.634479381190087e-05, "loss": 0.8461, "step": 221140 }, { "epoch": 3.882617321231061, "grad_norm": 0.05585648113663818, "learning_rate": 2.6337475507325073e-05, "loss": 0.843, "step": 221150 }, { "epoch": 3.882792886111062, "grad_norm": 0.04890887846388081, "learning_rate": 2.6330158148737806e-05, "loss": 0.8414, "step": 221160 }, { "epoch": 3.882968450991064, "grad_norm": 0.050848528885300844, "learning_rate": 2.6322841736231683e-05, "loss": 0.8377, "step": 221170 }, { "epoch": 3.883144015871065, "grad_norm": 0.04857309997621667, "learning_rate": 2.631552626989952e-05, "loss": 0.8399, "step": 221180 }, { "epoch": 3.8833195807510665, "grad_norm": 0.05051736294807755, "learning_rate": 2.6308211749833935e-05, "loss": 0.8462, "step": 221190 }, { "epoch": 3.883495145631068, "grad_norm": 0.048538715411217896, "learning_rate": 2.630089817612765e-05, "loss": 0.8289, "step": 221200 }, { "epoch": 3.8836707105110695, "grad_norm": 0.04993145268228837, "learning_rate": 2.6293585548873268e-05, "loss": 0.838, "step": 221210 }, { "epoch": 3.883846275391071, "grad_norm": 0.05100416921850043, "learning_rate": 2.628627386816347e-05, "loss": 0.8421, "step": 221220 }, { "epoch": 3.884021840271072, "grad_norm": 0.067147655005026, "learning_rate": 2.627896313409094e-05, "loss": 0.8395, "step": 221230 }, { "epoch": 3.8841974051510735, "grad_norm": 0.05460481134926325, "learning_rate": 2.627165334674823e-05, "loss": 0.841, "step": 221240 }, { "epoch": 3.884372970031075, "grad_norm": 0.0880934572400214, "learning_rate": 2.6264344506227993e-05, "loss": 0.8454, "step": 221250 }, { "epoch": 3.8845485349110764, "grad_norm": 0.045851070784151106, "learning_rate": 2.6257036612622865e-05, "loss": 0.8455, "step": 221260 }, { "epoch": 3.884724099791078, "grad_norm": 0.05666348989771427, "learning_rate": 2.624972966602538e-05, "loss": 0.8382, "step": 221270 }, { "epoch": 3.884899664671079, "grad_norm": 0.05696527513668593, "learning_rate": 2.624242366652815e-05, "loss": 0.8364, "step": 221280 }, { "epoch": 3.885075229551081, "grad_norm": 0.06191582764334084, "learning_rate": 2.623511861422373e-05, "loss": 0.8373, "step": 221290 }, { "epoch": 3.885250794431082, "grad_norm": 0.05954732362202053, "learning_rate": 2.6227814509204717e-05, "loss": 0.8392, "step": 221300 }, { "epoch": 3.8854263593110834, "grad_norm": 0.05405977265516696, "learning_rate": 2.6220511351563604e-05, "loss": 0.8409, "step": 221310 }, { "epoch": 3.885601924191085, "grad_norm": 0.04516498340521064, "learning_rate": 2.6213209141392936e-05, "loss": 0.8391, "step": 221320 }, { "epoch": 3.8857774890710863, "grad_norm": 0.04938668422737259, "learning_rate": 2.6205907878785273e-05, "loss": 0.8428, "step": 221330 }, { "epoch": 3.885953053951088, "grad_norm": 0.05082432603714264, "learning_rate": 2.6198607563833034e-05, "loss": 0.8345, "step": 221340 }, { "epoch": 3.886128618831089, "grad_norm": 0.05095689770716723, "learning_rate": 2.6191308196628822e-05, "loss": 0.8443, "step": 221350 }, { "epoch": 3.8863041837110903, "grad_norm": 0.061100295854960476, "learning_rate": 2.618400977726505e-05, "loss": 0.8338, "step": 221360 }, { "epoch": 3.886479748591092, "grad_norm": 0.0630951503872218, "learning_rate": 2.6176712305834245e-05, "loss": 0.8395, "step": 221370 }, { "epoch": 3.8866553134710933, "grad_norm": 0.058124228747362235, "learning_rate": 2.61694157824288e-05, "loss": 0.8415, "step": 221380 }, { "epoch": 3.8868308783510948, "grad_norm": 0.06765894808491713, "learning_rate": 2.6162120207141205e-05, "loss": 0.8449, "step": 221390 }, { "epoch": 3.887006443231096, "grad_norm": 0.05702866046239002, "learning_rate": 2.6154825580063918e-05, "loss": 0.8373, "step": 221400 }, { "epoch": 3.8871820081110977, "grad_norm": 0.12672473007735027, "learning_rate": 2.6147531901289302e-05, "loss": 0.8436, "step": 221410 }, { "epoch": 3.8873575729910987, "grad_norm": 0.048531262490116134, "learning_rate": 2.6140239170909796e-05, "loss": 0.8376, "step": 221420 }, { "epoch": 3.8875331378711, "grad_norm": 0.0588763928253645, "learning_rate": 2.6132947389017842e-05, "loss": 0.8384, "step": 221430 }, { "epoch": 3.8877087027511017, "grad_norm": 0.04918391462975673, "learning_rate": 2.6125656555705763e-05, "loss": 0.842, "step": 221440 }, { "epoch": 3.887884267631103, "grad_norm": 0.06437199137155032, "learning_rate": 2.611836667106597e-05, "loss": 0.8457, "step": 221450 }, { "epoch": 3.8880598325111047, "grad_norm": 0.04837937037341058, "learning_rate": 2.6111077735190814e-05, "loss": 0.8403, "step": 221460 }, { "epoch": 3.8882353973911057, "grad_norm": 0.056199380030482145, "learning_rate": 2.6103789748172695e-05, "loss": 0.8324, "step": 221470 }, { "epoch": 3.888410962271107, "grad_norm": 0.06764731318739224, "learning_rate": 2.609650271010387e-05, "loss": 0.8376, "step": 221480 }, { "epoch": 3.8885865271511086, "grad_norm": 0.06471251520020949, "learning_rate": 2.608921662107672e-05, "loss": 0.843, "step": 221490 }, { "epoch": 3.88876209203111, "grad_norm": 0.042444448896490954, "learning_rate": 2.608193148118358e-05, "loss": 0.8382, "step": 221500 }, { "epoch": 3.8889376569111116, "grad_norm": 0.05601594057171357, "learning_rate": 2.6074647290516673e-05, "loss": 0.8416, "step": 221510 }, { "epoch": 3.8891132217911126, "grad_norm": 0.06816686113507768, "learning_rate": 2.6067364049168398e-05, "loss": 0.84, "step": 221520 }, { "epoch": 3.8892887866711146, "grad_norm": 0.04383837233685804, "learning_rate": 2.6060081757230954e-05, "loss": 0.8311, "step": 221530 }, { "epoch": 3.8894643515511156, "grad_norm": 0.0450656091452457, "learning_rate": 2.605280041479665e-05, "loss": 0.8328, "step": 221540 }, { "epoch": 3.889639916431117, "grad_norm": 0.0571600267638123, "learning_rate": 2.6045520021957724e-05, "loss": 0.8281, "step": 221550 }, { "epoch": 3.8898154813111185, "grad_norm": 0.05065484279452021, "learning_rate": 2.6038240578806412e-05, "loss": 0.8422, "step": 221560 }, { "epoch": 3.88999104619112, "grad_norm": 0.04442978672185212, "learning_rate": 2.6030962085434983e-05, "loss": 0.8306, "step": 221570 }, { "epoch": 3.8901666110711215, "grad_norm": 0.05425915897445851, "learning_rate": 2.6023684541935612e-05, "loss": 0.8331, "step": 221580 }, { "epoch": 3.8903421759511225, "grad_norm": 0.08008081813327972, "learning_rate": 2.6016407948400524e-05, "loss": 0.8296, "step": 221590 }, { "epoch": 3.890517740831124, "grad_norm": 0.05158280917298065, "learning_rate": 2.600913230492194e-05, "loss": 0.8337, "step": 221600 }, { "epoch": 3.8906933057111255, "grad_norm": 0.04323987843528949, "learning_rate": 2.6001857611592e-05, "loss": 0.8402, "step": 221610 }, { "epoch": 3.890868870591127, "grad_norm": 0.05789280742983715, "learning_rate": 2.599458386850289e-05, "loss": 0.8432, "step": 221620 }, { "epoch": 3.8910444354711284, "grad_norm": 0.05888907422715911, "learning_rate": 2.598731107574679e-05, "loss": 0.837, "step": 221630 }, { "epoch": 3.89122000035113, "grad_norm": 0.08355688057742996, "learning_rate": 2.5980039233415848e-05, "loss": 0.8407, "step": 221640 }, { "epoch": 3.8913955652311314, "grad_norm": 0.05752788501927827, "learning_rate": 2.5972768341602176e-05, "loss": 0.8408, "step": 221650 }, { "epoch": 3.8915711301111324, "grad_norm": 0.08106848103639307, "learning_rate": 2.5965498400397897e-05, "loss": 0.8398, "step": 221660 }, { "epoch": 3.891746694991134, "grad_norm": 0.0463938259909158, "learning_rate": 2.595822940989516e-05, "loss": 0.8371, "step": 221670 }, { "epoch": 3.8919222598711354, "grad_norm": 0.054462707500172354, "learning_rate": 2.5950961370185984e-05, "loss": 0.8396, "step": 221680 }, { "epoch": 3.892097824751137, "grad_norm": 0.07357944811362693, "learning_rate": 2.5943694281362574e-05, "loss": 0.8431, "step": 221690 }, { "epoch": 3.8922733896311383, "grad_norm": 0.04883245922076961, "learning_rate": 2.5936428143516907e-05, "loss": 0.8365, "step": 221700 }, { "epoch": 3.8924489545111394, "grad_norm": 0.05678096960201468, "learning_rate": 2.59291629567411e-05, "loss": 0.8431, "step": 221710 }, { "epoch": 3.892624519391141, "grad_norm": 0.05769700335055701, "learning_rate": 2.5921898721127176e-05, "loss": 0.8377, "step": 221720 }, { "epoch": 3.8928000842711423, "grad_norm": 0.05388362650624751, "learning_rate": 2.5914635436767176e-05, "loss": 0.8464, "step": 221730 }, { "epoch": 3.892975649151144, "grad_norm": 0.04721682633638237, "learning_rate": 2.5907373103753162e-05, "loss": 0.8365, "step": 221740 }, { "epoch": 3.8931512140311453, "grad_norm": 0.05714723778255929, "learning_rate": 2.5900111722177095e-05, "loss": 0.841, "step": 221750 }, { "epoch": 3.8933267789111468, "grad_norm": 0.0496341538276018, "learning_rate": 2.5892851292130998e-05, "loss": 0.838, "step": 221760 }, { "epoch": 3.8935023437911482, "grad_norm": 0.05330902098541864, "learning_rate": 2.588559181370691e-05, "loss": 0.8342, "step": 221770 }, { "epoch": 3.8936779086711493, "grad_norm": 0.061977449480415066, "learning_rate": 2.587833328699673e-05, "loss": 0.8359, "step": 221780 }, { "epoch": 3.8938534735511507, "grad_norm": 0.06436290876382533, "learning_rate": 2.5871075712092473e-05, "loss": 0.8385, "step": 221790 }, { "epoch": 3.8940290384311522, "grad_norm": 0.05687975192158918, "learning_rate": 2.5863819089086077e-05, "loss": 0.8437, "step": 221800 }, { "epoch": 3.8942046033111537, "grad_norm": 0.053255077985922133, "learning_rate": 2.5856563418069528e-05, "loss": 0.84, "step": 221810 }, { "epoch": 3.894380168191155, "grad_norm": 0.046433818341630954, "learning_rate": 2.5849308699134688e-05, "loss": 0.8388, "step": 221820 }, { "epoch": 3.894555733071156, "grad_norm": 0.05872615443950612, "learning_rate": 2.5842054932373518e-05, "loss": 0.8366, "step": 221830 }, { "epoch": 3.8947312979511577, "grad_norm": 0.06381822892035627, "learning_rate": 2.5834802117877937e-05, "loss": 0.8341, "step": 221840 }, { "epoch": 3.894906862831159, "grad_norm": 0.045807874902925406, "learning_rate": 2.582755025573976e-05, "loss": 0.8443, "step": 221850 }, { "epoch": 3.8950824277111606, "grad_norm": 0.05483373104433313, "learning_rate": 2.5820299346050992e-05, "loss": 0.8407, "step": 221860 }, { "epoch": 3.895257992591162, "grad_norm": 0.050293626507445933, "learning_rate": 2.5813049388903417e-05, "loss": 0.8336, "step": 221870 }, { "epoch": 3.8954335574711636, "grad_norm": 0.05247647076160996, "learning_rate": 2.5805800384388925e-05, "loss": 0.8333, "step": 221880 }, { "epoch": 3.895609122351165, "grad_norm": 0.051838947019950135, "learning_rate": 2.579855233259934e-05, "loss": 0.8427, "step": 221890 }, { "epoch": 3.895784687231166, "grad_norm": 0.056035980815287226, "learning_rate": 2.57913052336265e-05, "loss": 0.8417, "step": 221900 }, { "epoch": 3.8959602521111676, "grad_norm": 0.049111023638129504, "learning_rate": 2.578405908756227e-05, "loss": 0.8372, "step": 221910 }, { "epoch": 3.896135816991169, "grad_norm": 0.04513243291012058, "learning_rate": 2.5776813894498393e-05, "loss": 0.844, "step": 221920 }, { "epoch": 3.8963113818711705, "grad_norm": 0.04688634761483277, "learning_rate": 2.5769569654526705e-05, "loss": 0.8417, "step": 221930 }, { "epoch": 3.896486946751172, "grad_norm": 0.049516552876564784, "learning_rate": 2.576232636773901e-05, "loss": 0.8367, "step": 221940 }, { "epoch": 3.896662511631173, "grad_norm": 0.050403581731815285, "learning_rate": 2.575508403422704e-05, "loss": 0.8436, "step": 221950 }, { "epoch": 3.896838076511175, "grad_norm": 0.05665490476461767, "learning_rate": 2.574784265408257e-05, "loss": 0.8382, "step": 221960 }, { "epoch": 3.897013641391176, "grad_norm": 0.05407461198483254, "learning_rate": 2.574060222739736e-05, "loss": 0.839, "step": 221970 }, { "epoch": 3.8971892062711775, "grad_norm": 0.05317010732949234, "learning_rate": 2.5733362754263167e-05, "loss": 0.8364, "step": 221980 }, { "epoch": 3.897364771151179, "grad_norm": 0.048500308357381006, "learning_rate": 2.5726124234771664e-05, "loss": 0.8475, "step": 221990 }, { "epoch": 3.8975403360311804, "grad_norm": 0.05095050383443403, "learning_rate": 2.5718886669014598e-05, "loss": 0.8428, "step": 222000 }, { "epoch": 3.897715900911182, "grad_norm": 0.05506128428183534, "learning_rate": 2.571165005708369e-05, "loss": 0.8376, "step": 222010 }, { "epoch": 3.897891465791183, "grad_norm": 0.04815780362998678, "learning_rate": 2.5704414399070543e-05, "loss": 0.8293, "step": 222020 }, { "epoch": 3.8980670306711844, "grad_norm": 0.05019755258429041, "learning_rate": 2.5697179695066964e-05, "loss": 0.8395, "step": 222030 }, { "epoch": 3.898242595551186, "grad_norm": 0.06931072562208648, "learning_rate": 2.5689945945164512e-05, "loss": 0.8388, "step": 222040 }, { "epoch": 3.8984181604311874, "grad_norm": 0.06027425320331265, "learning_rate": 2.5682713149454918e-05, "loss": 0.8395, "step": 222050 }, { "epoch": 3.898593725311189, "grad_norm": 0.0433124535990823, "learning_rate": 2.5675481308029735e-05, "loss": 0.8357, "step": 222060 }, { "epoch": 3.89876929019119, "grad_norm": 0.04423496727264816, "learning_rate": 2.566825042098065e-05, "loss": 0.8353, "step": 222070 }, { "epoch": 3.898944855071192, "grad_norm": 0.06260774065816824, "learning_rate": 2.56610204883993e-05, "loss": 0.836, "step": 222080 }, { "epoch": 3.899120419951193, "grad_norm": 0.04151114340919156, "learning_rate": 2.565379151037724e-05, "loss": 0.8324, "step": 222090 }, { "epoch": 3.8992959848311943, "grad_norm": 0.0723105555101005, "learning_rate": 2.564656348700607e-05, "loss": 0.8362, "step": 222100 }, { "epoch": 3.899471549711196, "grad_norm": 0.060377660018217975, "learning_rate": 2.5639336418377408e-05, "loss": 0.8403, "step": 222110 }, { "epoch": 3.8996471145911973, "grad_norm": 0.04742296464886442, "learning_rate": 2.5632110304582767e-05, "loss": 0.8463, "step": 222120 }, { "epoch": 3.8998226794711988, "grad_norm": 0.05569296182646124, "learning_rate": 2.5624885145713745e-05, "loss": 0.8412, "step": 222130 }, { "epoch": 3.8999982443512, "grad_norm": 0.05399414827649697, "learning_rate": 2.561766094186187e-05, "loss": 0.8423, "step": 222140 }, { "epoch": 3.9001738092312013, "grad_norm": 0.04555694879220789, "learning_rate": 2.5610437693118695e-05, "loss": 0.828, "step": 222150 }, { "epoch": 3.9003493741112027, "grad_norm": 0.0508733229572191, "learning_rate": 2.5603215399575697e-05, "loss": 0.8319, "step": 222160 }, { "epoch": 3.9005249389912042, "grad_norm": 0.05312127686073041, "learning_rate": 2.559599406132442e-05, "loss": 0.8407, "step": 222170 }, { "epoch": 3.9007005038712057, "grad_norm": 0.0504648945768219, "learning_rate": 2.558877367845637e-05, "loss": 0.8284, "step": 222180 }, { "epoch": 3.9008760687512067, "grad_norm": 0.05516512839154054, "learning_rate": 2.558155425106296e-05, "loss": 0.8452, "step": 222190 }, { "epoch": 3.9010516336312087, "grad_norm": 0.052836307774292275, "learning_rate": 2.5574335779235765e-05, "loss": 0.8409, "step": 222200 }, { "epoch": 3.9012271985112097, "grad_norm": 0.06100688053162658, "learning_rate": 2.5567118263066154e-05, "loss": 0.8473, "step": 222210 }, { "epoch": 3.901402763391211, "grad_norm": 0.06505172320941134, "learning_rate": 2.555990170264565e-05, "loss": 0.8424, "step": 222220 }, { "epoch": 3.9015783282712126, "grad_norm": 0.060319490356756524, "learning_rate": 2.555268609806562e-05, "loss": 0.8381, "step": 222230 }, { "epoch": 3.901753893151214, "grad_norm": 0.053201003892375844, "learning_rate": 2.5545471449417513e-05, "loss": 0.8426, "step": 222240 }, { "epoch": 3.9019294580312156, "grad_norm": 0.04524932792105024, "learning_rate": 2.5538257756792772e-05, "loss": 0.8461, "step": 222250 }, { "epoch": 3.9021050229112166, "grad_norm": 0.05949112376619402, "learning_rate": 2.5531045020282746e-05, "loss": 0.8432, "step": 222260 }, { "epoch": 3.902280587791218, "grad_norm": 0.05861888546190387, "learning_rate": 2.5523833239978828e-05, "loss": 0.8323, "step": 222270 }, { "epoch": 3.9024561526712196, "grad_norm": 0.04570210707248754, "learning_rate": 2.5516622415972452e-05, "loss": 0.8404, "step": 222280 }, { "epoch": 3.902631717551221, "grad_norm": 0.05172482949654708, "learning_rate": 2.5509412548354906e-05, "loss": 0.836, "step": 222290 }, { "epoch": 3.9028072824312225, "grad_norm": 0.06895212495911308, "learning_rate": 2.550220363721756e-05, "loss": 0.8348, "step": 222300 }, { "epoch": 3.902982847311224, "grad_norm": 0.04647172875207578, "learning_rate": 2.549499568265177e-05, "loss": 0.8365, "step": 222310 }, { "epoch": 3.9031584121912255, "grad_norm": 0.06002378225453959, "learning_rate": 2.5487788684748882e-05, "loss": 0.8374, "step": 222320 }, { "epoch": 3.9033339770712265, "grad_norm": 0.047296776128430765, "learning_rate": 2.5480582643600155e-05, "loss": 0.8452, "step": 222330 }, { "epoch": 3.903509541951228, "grad_norm": 0.04788747362780445, "learning_rate": 2.5473377559296925e-05, "loss": 0.8467, "step": 222340 }, { "epoch": 3.9036851068312295, "grad_norm": 0.07831503650157705, "learning_rate": 2.5466173431930502e-05, "loss": 0.8412, "step": 222350 }, { "epoch": 3.903860671711231, "grad_norm": 0.06050165277749668, "learning_rate": 2.5458970261592074e-05, "loss": 0.8366, "step": 222360 }, { "epoch": 3.9040362365912324, "grad_norm": 0.039923632348503615, "learning_rate": 2.545176804837303e-05, "loss": 0.8393, "step": 222370 }, { "epoch": 3.9042118014712335, "grad_norm": 0.048646008894863674, "learning_rate": 2.5444566792364533e-05, "loss": 0.8467, "step": 222380 }, { "epoch": 3.904387366351235, "grad_norm": 0.05671811778747898, "learning_rate": 2.5437366493657888e-05, "loss": 0.8407, "step": 222390 }, { "epoch": 3.9045629312312364, "grad_norm": 0.045406767968990236, "learning_rate": 2.5430167152344265e-05, "loss": 0.8359, "step": 222400 }, { "epoch": 3.904738496111238, "grad_norm": 0.052824139107193115, "learning_rate": 2.54229687685149e-05, "loss": 0.8364, "step": 222410 }, { "epoch": 3.9049140609912394, "grad_norm": 0.052288614124564814, "learning_rate": 2.5415771342261042e-05, "loss": 0.8363, "step": 222420 }, { "epoch": 3.905089625871241, "grad_norm": 0.06532166439578808, "learning_rate": 2.540857487367381e-05, "loss": 0.8438, "step": 222430 }, { "epoch": 3.9052651907512423, "grad_norm": 0.059699475300794745, "learning_rate": 2.5401379362844428e-05, "loss": 0.842, "step": 222440 }, { "epoch": 3.9054407556312434, "grad_norm": 0.05570751724812584, "learning_rate": 2.539418480986408e-05, "loss": 0.8426, "step": 222450 }, { "epoch": 3.905616320511245, "grad_norm": 0.051137068739079465, "learning_rate": 2.5386991214823874e-05, "loss": 0.8412, "step": 222460 }, { "epoch": 3.9057918853912463, "grad_norm": 0.04394206565820816, "learning_rate": 2.537979857781498e-05, "loss": 0.8466, "step": 222470 }, { "epoch": 3.905967450271248, "grad_norm": 0.0947206603928735, "learning_rate": 2.537260689892853e-05, "loss": 0.8416, "step": 222480 }, { "epoch": 3.9061430151512493, "grad_norm": 0.03911333069736279, "learning_rate": 2.5365416178255675e-05, "loss": 0.8398, "step": 222490 }, { "epoch": 3.9063185800312503, "grad_norm": 0.048972361473956005, "learning_rate": 2.535822641588747e-05, "loss": 0.8361, "step": 222500 }, { "epoch": 3.906494144911252, "grad_norm": 0.06554427823946492, "learning_rate": 2.535103761191502e-05, "loss": 0.8405, "step": 222510 }, { "epoch": 3.9066697097912533, "grad_norm": 0.05195906888506268, "learning_rate": 2.5343849766429455e-05, "loss": 0.8381, "step": 222520 }, { "epoch": 3.9068452746712548, "grad_norm": 0.06238015094059804, "learning_rate": 2.5336662879521757e-05, "loss": 0.8383, "step": 222530 }, { "epoch": 3.9070208395512562, "grad_norm": 0.04952976044438362, "learning_rate": 2.53294769512831e-05, "loss": 0.8314, "step": 222540 }, { "epoch": 3.9071964044312577, "grad_norm": 0.04559724506494129, "learning_rate": 2.5322291981804443e-05, "loss": 0.8413, "step": 222550 }, { "epoch": 3.907371969311259, "grad_norm": 0.052456440618128715, "learning_rate": 2.5315107971176878e-05, "loss": 0.8428, "step": 222560 }, { "epoch": 3.90754753419126, "grad_norm": 0.04568018729326008, "learning_rate": 2.5307924919491372e-05, "loss": 0.8341, "step": 222570 }, { "epoch": 3.9077230990712617, "grad_norm": 0.05644382600402056, "learning_rate": 2.5300742826838958e-05, "loss": 0.8417, "step": 222580 }, { "epoch": 3.907898663951263, "grad_norm": 0.04794315833333682, "learning_rate": 2.5293561693310675e-05, "loss": 0.8426, "step": 222590 }, { "epoch": 3.9080742288312647, "grad_norm": 0.057916646751041675, "learning_rate": 2.5286381518997443e-05, "loss": 0.8392, "step": 222600 }, { "epoch": 3.908249793711266, "grad_norm": 0.05250066924167504, "learning_rate": 2.5279202303990274e-05, "loss": 0.8342, "step": 222610 }, { "epoch": 3.908425358591267, "grad_norm": 0.053054903836349705, "learning_rate": 2.5272024048380137e-05, "loss": 0.8372, "step": 222620 }, { "epoch": 3.908600923471269, "grad_norm": 0.05210758544404214, "learning_rate": 2.5264846752257955e-05, "loss": 0.8456, "step": 222630 }, { "epoch": 3.90877648835127, "grad_norm": 0.04049142517473293, "learning_rate": 2.525767041571467e-05, "loss": 0.836, "step": 222640 }, { "epoch": 3.9089520532312716, "grad_norm": 0.04610985032062517, "learning_rate": 2.5250495038841232e-05, "loss": 0.8438, "step": 222650 }, { "epoch": 3.909127618111273, "grad_norm": 0.05750822127642141, "learning_rate": 2.5243320621728557e-05, "loss": 0.8414, "step": 222660 }, { "epoch": 3.9093031829912746, "grad_norm": 0.07489920738508794, "learning_rate": 2.5236147164467495e-05, "loss": 0.8352, "step": 222670 }, { "epoch": 3.909478747871276, "grad_norm": 0.04973911310201513, "learning_rate": 2.5228974667148968e-05, "loss": 0.8442, "step": 222680 }, { "epoch": 3.909654312751277, "grad_norm": 0.05761714179913874, "learning_rate": 2.5221803129863884e-05, "loss": 0.8382, "step": 222690 }, { "epoch": 3.9098298776312785, "grad_norm": 0.05798019315246162, "learning_rate": 2.5214632552703027e-05, "loss": 0.843, "step": 222700 }, { "epoch": 3.91000544251128, "grad_norm": 0.05790663208551657, "learning_rate": 2.520746293575736e-05, "loss": 0.8355, "step": 222710 }, { "epoch": 3.9101810073912815, "grad_norm": 0.051900359291227535, "learning_rate": 2.520029427911762e-05, "loss": 0.8431, "step": 222720 }, { "epoch": 3.910356572271283, "grad_norm": 0.0532603819385904, "learning_rate": 2.5193126582874717e-05, "loss": 0.8369, "step": 222730 }, { "epoch": 3.910532137151284, "grad_norm": 0.04891053679878505, "learning_rate": 2.51859598471194e-05, "loss": 0.8432, "step": 222740 }, { "epoch": 3.910707702031286, "grad_norm": 0.05635723215794897, "learning_rate": 2.5178794071942497e-05, "loss": 0.8345, "step": 222750 }, { "epoch": 3.910883266911287, "grad_norm": 0.04745852694065883, "learning_rate": 2.517162925743483e-05, "loss": 0.8415, "step": 222760 }, { "epoch": 3.9110588317912884, "grad_norm": 0.049571360046278226, "learning_rate": 2.5164465403687138e-05, "loss": 0.8445, "step": 222770 }, { "epoch": 3.91123439667129, "grad_norm": 0.05358081082111596, "learning_rate": 2.5157302510790187e-05, "loss": 0.8354, "step": 222780 }, { "epoch": 3.9114099615512914, "grad_norm": 0.04480936441606455, "learning_rate": 2.5150140578834785e-05, "loss": 0.843, "step": 222790 }, { "epoch": 3.911585526431293, "grad_norm": 0.05135699245890941, "learning_rate": 2.5142979607911605e-05, "loss": 0.8306, "step": 222800 }, { "epoch": 3.911761091311294, "grad_norm": 0.04784014493148808, "learning_rate": 2.513581959811142e-05, "loss": 0.8379, "step": 222810 }, { "epoch": 3.9119366561912954, "grad_norm": 0.047636982311832506, "learning_rate": 2.5128660549524936e-05, "loss": 0.8376, "step": 222820 }, { "epoch": 3.912112221071297, "grad_norm": 0.07585041254467774, "learning_rate": 2.5121502462242897e-05, "loss": 0.8407, "step": 222830 }, { "epoch": 3.9122877859512983, "grad_norm": 0.056758557372683524, "learning_rate": 2.511434533635592e-05, "loss": 0.8361, "step": 222840 }, { "epoch": 3.9124633508313, "grad_norm": 0.052679176917707515, "learning_rate": 2.5107189171954748e-05, "loss": 0.8347, "step": 222850 }, { "epoch": 3.912638915711301, "grad_norm": 0.05681462775667279, "learning_rate": 2.5100033969130054e-05, "loss": 0.8326, "step": 222860 }, { "epoch": 3.9128144805913028, "grad_norm": 0.05252644959309877, "learning_rate": 2.509287972797241e-05, "loss": 0.8353, "step": 222870 }, { "epoch": 3.912990045471304, "grad_norm": 0.065855881824067, "learning_rate": 2.50857264485726e-05, "loss": 0.8398, "step": 222880 }, { "epoch": 3.9131656103513053, "grad_norm": 0.0682574561875848, "learning_rate": 2.507857413102115e-05, "loss": 0.8449, "step": 222890 }, { "epoch": 3.9133411752313068, "grad_norm": 0.049085464328467214, "learning_rate": 2.507142277540874e-05, "loss": 0.8372, "step": 222900 }, { "epoch": 3.9135167401113082, "grad_norm": 0.057471239967051814, "learning_rate": 2.506427238182594e-05, "loss": 0.8436, "step": 222910 }, { "epoch": 3.9136923049913097, "grad_norm": 0.04952618242859789, "learning_rate": 2.5057122950363355e-05, "loss": 0.8421, "step": 222920 }, { "epoch": 3.9138678698713107, "grad_norm": 0.08035761768240005, "learning_rate": 2.5049974481111595e-05, "loss": 0.8312, "step": 222930 }, { "epoch": 3.9140434347513122, "grad_norm": 0.0487278079069478, "learning_rate": 2.50428269741612e-05, "loss": 0.8399, "step": 222940 }, { "epoch": 3.9142189996313137, "grad_norm": 0.0816181795021592, "learning_rate": 2.5035680429602738e-05, "loss": 0.8428, "step": 222950 }, { "epoch": 3.914394564511315, "grad_norm": 0.06680148582254974, "learning_rate": 2.502853484752679e-05, "loss": 0.8351, "step": 222960 }, { "epoch": 3.9145701293913167, "grad_norm": 0.04608058533976283, "learning_rate": 2.5021390228023838e-05, "loss": 0.8423, "step": 222970 }, { "epoch": 3.9147456942713177, "grad_norm": 0.048243771479153416, "learning_rate": 2.501424657118444e-05, "loss": 0.8408, "step": 222980 }, { "epoch": 3.9149212591513196, "grad_norm": 0.04561159599788216, "learning_rate": 2.500710387709909e-05, "loss": 0.8356, "step": 222990 }, { "epoch": 3.9150968240313206, "grad_norm": 0.05067786512171242, "learning_rate": 2.4999962145858332e-05, "loss": 0.8415, "step": 223000 }, { "epoch": 3.915272388911322, "grad_norm": 0.04792632939687021, "learning_rate": 2.499282137755259e-05, "loss": 0.8353, "step": 223010 }, { "epoch": 3.9154479537913236, "grad_norm": 0.04708025888635383, "learning_rate": 2.498568157227237e-05, "loss": 0.835, "step": 223020 }, { "epoch": 3.915623518671325, "grad_norm": 0.05512401020216134, "learning_rate": 2.4978542730108153e-05, "loss": 0.8306, "step": 223030 }, { "epoch": 3.9157990835513266, "grad_norm": 0.053294569416631084, "learning_rate": 2.4971404851150327e-05, "loss": 0.8462, "step": 223040 }, { "epoch": 3.9159746484313276, "grad_norm": 0.04964393280292346, "learning_rate": 2.496426793548942e-05, "loss": 0.8362, "step": 223050 }, { "epoch": 3.916150213311329, "grad_norm": 0.053129865908775685, "learning_rate": 2.4957131983215777e-05, "loss": 0.8336, "step": 223060 }, { "epoch": 3.9163257781913305, "grad_norm": 0.055949936585148186, "learning_rate": 2.494999699441987e-05, "loss": 0.8378, "step": 223070 }, { "epoch": 3.916501343071332, "grad_norm": 0.05023851661608079, "learning_rate": 2.4942862969192063e-05, "loss": 0.8419, "step": 223080 }, { "epoch": 3.9166769079513335, "grad_norm": 0.059835608739477325, "learning_rate": 2.4935729907622754e-05, "loss": 0.8387, "step": 223090 }, { "epoch": 3.916852472831335, "grad_norm": 0.05763509156512283, "learning_rate": 2.4928597809802355e-05, "loss": 0.8316, "step": 223100 }, { "epoch": 3.9170280377113365, "grad_norm": 0.05022630400949895, "learning_rate": 2.492146667582116e-05, "loss": 0.8401, "step": 223110 }, { "epoch": 3.9172036025913375, "grad_norm": 0.052385232723080065, "learning_rate": 2.491433650576957e-05, "loss": 0.8394, "step": 223120 }, { "epoch": 3.917379167471339, "grad_norm": 0.05002254262703842, "learning_rate": 2.490720729973795e-05, "loss": 0.843, "step": 223130 }, { "epoch": 3.9175547323513404, "grad_norm": 0.048839952583107346, "learning_rate": 2.4900079057816563e-05, "loss": 0.8394, "step": 223140 }, { "epoch": 3.917730297231342, "grad_norm": 0.04908643989855063, "learning_rate": 2.4892951780095765e-05, "loss": 0.8503, "step": 223150 }, { "epoch": 3.9179058621113434, "grad_norm": 0.06714767961869435, "learning_rate": 2.4885825466665853e-05, "loss": 0.8296, "step": 223160 }, { "epoch": 3.9180814269913444, "grad_norm": 0.05235203221316881, "learning_rate": 2.4878700117617145e-05, "loss": 0.8416, "step": 223170 }, { "epoch": 3.918256991871346, "grad_norm": 0.05655042391345255, "learning_rate": 2.4871575733039862e-05, "loss": 0.837, "step": 223180 }, { "epoch": 3.9184325567513474, "grad_norm": 0.050462290620073715, "learning_rate": 2.4864452313024316e-05, "loss": 0.8342, "step": 223190 }, { "epoch": 3.918608121631349, "grad_norm": 0.04637210504593296, "learning_rate": 2.4857329857660774e-05, "loss": 0.8352, "step": 223200 }, { "epoch": 3.9187836865113503, "grad_norm": 0.064379678635108, "learning_rate": 2.4850208367039396e-05, "loss": 0.8404, "step": 223210 }, { "epoch": 3.918959251391352, "grad_norm": 0.0465860113420379, "learning_rate": 2.484308784125052e-05, "loss": 0.8427, "step": 223220 }, { "epoch": 3.9191348162713533, "grad_norm": 0.06271684623781769, "learning_rate": 2.4835968280384307e-05, "loss": 0.8398, "step": 223230 }, { "epoch": 3.9193103811513543, "grad_norm": 0.05160943889064425, "learning_rate": 2.4828849684530987e-05, "loss": 0.8366, "step": 223240 }, { "epoch": 3.919485946031356, "grad_norm": 0.03924001714865068, "learning_rate": 2.4821732053780714e-05, "loss": 0.8295, "step": 223250 }, { "epoch": 3.9196615109113573, "grad_norm": 0.041132229107978986, "learning_rate": 2.4814615388223696e-05, "loss": 0.8305, "step": 223260 }, { "epoch": 3.9198370757913588, "grad_norm": 0.05727879564856762, "learning_rate": 2.480749968795014e-05, "loss": 0.8462, "step": 223270 }, { "epoch": 3.9200126406713602, "grad_norm": 0.047632396495137175, "learning_rate": 2.4800384953050125e-05, "loss": 0.8394, "step": 223280 }, { "epoch": 3.9201882055513613, "grad_norm": 0.05324215493316091, "learning_rate": 2.4793271183613842e-05, "loss": 0.8339, "step": 223290 }, { "epoch": 3.9203637704313627, "grad_norm": 0.0510860358835914, "learning_rate": 2.4786158379731445e-05, "loss": 0.8366, "step": 223300 }, { "epoch": 3.9205393353113642, "grad_norm": 0.05893799589145969, "learning_rate": 2.4779046541492995e-05, "loss": 0.838, "step": 223310 }, { "epoch": 3.9207149001913657, "grad_norm": 0.04297664764834778, "learning_rate": 2.4771935668988636e-05, "loss": 0.8407, "step": 223320 }, { "epoch": 3.920890465071367, "grad_norm": 0.17901914086912823, "learning_rate": 2.4764825762308462e-05, "loss": 0.8391, "step": 223330 }, { "epoch": 3.9210660299513687, "grad_norm": 0.04751149715552077, "learning_rate": 2.4757716821542584e-05, "loss": 0.8389, "step": 223340 }, { "epoch": 3.92124159483137, "grad_norm": 0.05359505790382573, "learning_rate": 2.4750608846781018e-05, "loss": 0.8462, "step": 223350 }, { "epoch": 3.921417159711371, "grad_norm": 0.04121727165131206, "learning_rate": 2.4743501838113854e-05, "loss": 0.8408, "step": 223360 }, { "epoch": 3.9215927245913726, "grad_norm": 0.05243557372666204, "learning_rate": 2.473639579563115e-05, "loss": 0.842, "step": 223370 }, { "epoch": 3.921768289471374, "grad_norm": 0.046395350782784894, "learning_rate": 2.472929071942289e-05, "loss": 0.8387, "step": 223380 }, { "epoch": 3.9219438543513756, "grad_norm": 0.05562416432839406, "learning_rate": 2.472218660957918e-05, "loss": 0.8327, "step": 223390 }, { "epoch": 3.922119419231377, "grad_norm": 0.05249161707483656, "learning_rate": 2.4715083466189963e-05, "loss": 0.833, "step": 223400 }, { "epoch": 3.922294984111378, "grad_norm": 0.04924528856922924, "learning_rate": 2.4707981289345275e-05, "loss": 0.8384, "step": 223410 }, { "epoch": 3.92247054899138, "grad_norm": 0.05020663163262387, "learning_rate": 2.470088007913507e-05, "loss": 0.8503, "step": 223420 }, { "epoch": 3.922646113871381, "grad_norm": 0.05526747987591852, "learning_rate": 2.4693779835649322e-05, "loss": 0.8403, "step": 223430 }, { "epoch": 3.9228216787513825, "grad_norm": 0.054855693490470175, "learning_rate": 2.468668055897805e-05, "loss": 0.8386, "step": 223440 }, { "epoch": 3.922997243631384, "grad_norm": 0.11811092747384613, "learning_rate": 2.4679582249211127e-05, "loss": 0.8407, "step": 223450 }, { "epoch": 3.9231728085113855, "grad_norm": 0.03912785788399499, "learning_rate": 2.4672484906438513e-05, "loss": 0.843, "step": 223460 }, { "epoch": 3.923348373391387, "grad_norm": 0.04700597347386273, "learning_rate": 2.4665388530750187e-05, "loss": 0.8439, "step": 223470 }, { "epoch": 3.923523938271388, "grad_norm": 0.04839487041841482, "learning_rate": 2.465829312223599e-05, "loss": 0.8406, "step": 223480 }, { "epoch": 3.9236995031513895, "grad_norm": 0.06062724693202287, "learning_rate": 2.4651198680985846e-05, "loss": 0.8336, "step": 223490 }, { "epoch": 3.923875068031391, "grad_norm": 0.0629905031764298, "learning_rate": 2.4644105207089642e-05, "loss": 0.8386, "step": 223500 }, { "epoch": 3.9240506329113924, "grad_norm": 0.05038689536509044, "learning_rate": 2.463701270063729e-05, "loss": 0.8361, "step": 223510 }, { "epoch": 3.924226197791394, "grad_norm": 0.05271554142286564, "learning_rate": 2.4629921161718593e-05, "loss": 0.8457, "step": 223520 }, { "epoch": 3.924401762671395, "grad_norm": 0.04598280615078472, "learning_rate": 2.4622830590423428e-05, "loss": 0.8328, "step": 223530 }, { "epoch": 3.924577327551397, "grad_norm": 0.06775766193620684, "learning_rate": 2.461574098684167e-05, "loss": 0.8414, "step": 223540 }, { "epoch": 3.924752892431398, "grad_norm": 0.0499704203292704, "learning_rate": 2.4608652351063053e-05, "loss": 0.8386, "step": 223550 }, { "epoch": 3.9249284573113994, "grad_norm": 0.057802878623316706, "learning_rate": 2.4601564683177517e-05, "loss": 0.8375, "step": 223560 }, { "epoch": 3.925104022191401, "grad_norm": 0.05501947480566439, "learning_rate": 2.459447798327476e-05, "loss": 0.8388, "step": 223570 }, { "epoch": 3.9252795870714023, "grad_norm": 0.05105925218013196, "learning_rate": 2.4587392251444647e-05, "loss": 0.8394, "step": 223580 }, { "epoch": 3.925455151951404, "grad_norm": 0.04891881226928075, "learning_rate": 2.4580307487776877e-05, "loss": 0.8393, "step": 223590 }, { "epoch": 3.925630716831405, "grad_norm": 0.04393028668541129, "learning_rate": 2.4573223692361272e-05, "loss": 0.8381, "step": 223600 }, { "epoch": 3.9258062817114063, "grad_norm": 0.0703778171868106, "learning_rate": 2.4566140865287583e-05, "loss": 0.8386, "step": 223610 }, { "epoch": 3.925981846591408, "grad_norm": 0.0542423255277622, "learning_rate": 2.455905900664552e-05, "loss": 0.8367, "step": 223620 }, { "epoch": 3.9261574114714093, "grad_norm": 0.0883874779432791, "learning_rate": 2.455197811652482e-05, "loss": 0.8377, "step": 223630 }, { "epoch": 3.9263329763514108, "grad_norm": 0.05051554286958653, "learning_rate": 2.4544898195015245e-05, "loss": 0.8331, "step": 223640 }, { "epoch": 3.926508541231412, "grad_norm": 0.06461369541864205, "learning_rate": 2.4537819242206438e-05, "loss": 0.8448, "step": 223650 }, { "epoch": 3.9266841061114137, "grad_norm": 0.047449804428909184, "learning_rate": 2.45307412581881e-05, "loss": 0.8406, "step": 223660 }, { "epoch": 3.9268596709914148, "grad_norm": 0.07085143830001511, "learning_rate": 2.4523664243049946e-05, "loss": 0.8425, "step": 223670 }, { "epoch": 3.9270352358714162, "grad_norm": 0.047388456791911615, "learning_rate": 2.4516588196881633e-05, "loss": 0.8397, "step": 223680 }, { "epoch": 3.9272108007514177, "grad_norm": 0.14969775743968985, "learning_rate": 2.450951311977279e-05, "loss": 0.8494, "step": 223690 }, { "epoch": 3.927386365631419, "grad_norm": 0.046005296742803785, "learning_rate": 2.4502439011813065e-05, "loss": 0.8457, "step": 223700 }, { "epoch": 3.9275619305114207, "grad_norm": 0.06509808326749875, "learning_rate": 2.4495365873092136e-05, "loss": 0.8426, "step": 223710 }, { "epoch": 3.9277374953914217, "grad_norm": 0.05439897358312565, "learning_rate": 2.4488293703699514e-05, "loss": 0.8385, "step": 223720 }, { "epoch": 3.927913060271423, "grad_norm": 0.04938063441280698, "learning_rate": 2.448122250372494e-05, "loss": 0.8321, "step": 223730 }, { "epoch": 3.9280886251514247, "grad_norm": 0.06329698046664364, "learning_rate": 2.447415227325791e-05, "loss": 0.8368, "step": 223740 }, { "epoch": 3.928264190031426, "grad_norm": 0.06473332257704978, "learning_rate": 2.4467083012388072e-05, "loss": 0.8361, "step": 223750 }, { "epoch": 3.9284397549114276, "grad_norm": 0.1228807809432291, "learning_rate": 2.4460014721204927e-05, "loss": 0.8334, "step": 223760 }, { "epoch": 3.928615319791429, "grad_norm": 0.04672775553937674, "learning_rate": 2.445294739979806e-05, "loss": 0.8387, "step": 223770 }, { "epoch": 3.9287908846714306, "grad_norm": 0.05680709356047684, "learning_rate": 2.4445881048257057e-05, "loss": 0.8355, "step": 223780 }, { "epoch": 3.9289664495514316, "grad_norm": 0.07064693709782413, "learning_rate": 2.4438815666671377e-05, "loss": 0.8332, "step": 223790 }, { "epoch": 3.929142014431433, "grad_norm": 0.049232702142001795, "learning_rate": 2.4431751255130574e-05, "loss": 0.8392, "step": 223800 }, { "epoch": 3.9293175793114345, "grad_norm": 0.049183572633600785, "learning_rate": 2.442468781372419e-05, "loss": 0.8424, "step": 223810 }, { "epoch": 3.929493144191436, "grad_norm": 0.06007952204474032, "learning_rate": 2.441762534254166e-05, "loss": 0.8355, "step": 223820 }, { "epoch": 3.9296687090714375, "grad_norm": 0.05208452318674892, "learning_rate": 2.441056384167248e-05, "loss": 0.8394, "step": 223830 }, { "epoch": 3.9298442739514385, "grad_norm": 0.04986974777271606, "learning_rate": 2.4403503311206152e-05, "loss": 0.839, "step": 223840 }, { "epoch": 3.93001983883144, "grad_norm": 0.05326906435999666, "learning_rate": 2.439644375123214e-05, "loss": 0.8391, "step": 223850 }, { "epoch": 3.9301954037114415, "grad_norm": 0.048317015352379554, "learning_rate": 2.438938516183983e-05, "loss": 0.844, "step": 223860 }, { "epoch": 3.930370968591443, "grad_norm": 0.043541265757232224, "learning_rate": 2.4382327543118707e-05, "loss": 0.8408, "step": 223870 }, { "epoch": 3.9305465334714444, "grad_norm": 0.05457252810915343, "learning_rate": 2.4375270895158196e-05, "loss": 0.8426, "step": 223880 }, { "epoch": 3.930722098351446, "grad_norm": 0.06967359845156232, "learning_rate": 2.436821521804763e-05, "loss": 0.8372, "step": 223890 }, { "epoch": 3.9308976632314474, "grad_norm": 0.05559620310266747, "learning_rate": 2.436116051187653e-05, "loss": 0.8374, "step": 223900 }, { "epoch": 3.9310732281114484, "grad_norm": 0.04502612380973581, "learning_rate": 2.4354106776734174e-05, "loss": 0.843, "step": 223910 }, { "epoch": 3.93124879299145, "grad_norm": 0.058635889520780624, "learning_rate": 2.434705401271e-05, "loss": 0.8401, "step": 223920 }, { "epoch": 3.9314243578714514, "grad_norm": 0.04492718066042652, "learning_rate": 2.4340002219893334e-05, "loss": 0.8386, "step": 223930 }, { "epoch": 3.931599922751453, "grad_norm": 0.04355234583555915, "learning_rate": 2.4332951398373515e-05, "loss": 0.8304, "step": 223940 }, { "epoch": 3.9317754876314543, "grad_norm": 0.04444816717927564, "learning_rate": 2.4325901548239927e-05, "loss": 0.8432, "step": 223950 }, { "epoch": 3.9319510525114554, "grad_norm": 0.06308421271011419, "learning_rate": 2.4318852669581836e-05, "loss": 0.8363, "step": 223960 }, { "epoch": 3.932126617391457, "grad_norm": 0.0723679598270689, "learning_rate": 2.431180476248857e-05, "loss": 0.8388, "step": 223970 }, { "epoch": 3.9323021822714583, "grad_norm": 0.04954448325688299, "learning_rate": 2.430475782704947e-05, "loss": 0.834, "step": 223980 }, { "epoch": 3.93247774715146, "grad_norm": 0.04863550780569379, "learning_rate": 2.4297711863353757e-05, "loss": 0.8366, "step": 223990 }, { "epoch": 3.9326533120314613, "grad_norm": 0.05234502697807503, "learning_rate": 2.4290666871490737e-05, "loss": 0.8352, "step": 224000 }, { "epoch": 3.9328288769114628, "grad_norm": 0.05402449194254526, "learning_rate": 2.4283622851549665e-05, "loss": 0.8413, "step": 224010 }, { "epoch": 3.9330044417914642, "grad_norm": 0.04805542559108572, "learning_rate": 2.427657980361982e-05, "loss": 0.8442, "step": 224020 }, { "epoch": 3.9331800066714653, "grad_norm": 0.0518576905712402, "learning_rate": 2.426953772779039e-05, "loss": 0.8381, "step": 224030 }, { "epoch": 3.9333555715514668, "grad_norm": 0.05210529228102218, "learning_rate": 2.4262496624150623e-05, "loss": 0.8364, "step": 224040 }, { "epoch": 3.9335311364314682, "grad_norm": 0.06177317130231797, "learning_rate": 2.425545649278975e-05, "loss": 0.8385, "step": 224050 }, { "epoch": 3.9337067013114697, "grad_norm": 0.07529380083206047, "learning_rate": 2.4248417333796938e-05, "loss": 0.8346, "step": 224060 }, { "epoch": 3.933882266191471, "grad_norm": 0.0672219959658823, "learning_rate": 2.424137914726137e-05, "loss": 0.8335, "step": 224070 }, { "epoch": 3.9340578310714722, "grad_norm": 0.04438231908397419, "learning_rate": 2.4234341933272256e-05, "loss": 0.8422, "step": 224080 }, { "epoch": 3.934233395951474, "grad_norm": 0.07131112844252073, "learning_rate": 2.4227305691918753e-05, "loss": 0.8412, "step": 224090 }, { "epoch": 3.934408960831475, "grad_norm": 0.05185847460707317, "learning_rate": 2.4220270423289988e-05, "loss": 0.8411, "step": 224100 }, { "epoch": 3.9345845257114767, "grad_norm": 0.05092729042862933, "learning_rate": 2.42132361274751e-05, "loss": 0.8386, "step": 224110 }, { "epoch": 3.934760090591478, "grad_norm": 0.0469894209407835, "learning_rate": 2.420620280456326e-05, "loss": 0.847, "step": 224120 }, { "epoch": 3.9349356554714796, "grad_norm": 0.06310715101920093, "learning_rate": 2.4199170454643523e-05, "loss": 0.8363, "step": 224130 }, { "epoch": 3.935111220351481, "grad_norm": 0.053234016051740156, "learning_rate": 2.419213907780501e-05, "loss": 0.8474, "step": 224140 }, { "epoch": 3.935286785231482, "grad_norm": 0.056581789182372104, "learning_rate": 2.4185108674136845e-05, "loss": 0.8387, "step": 224150 }, { "epoch": 3.9354623501114836, "grad_norm": 0.0703410237965118, "learning_rate": 2.417807924372804e-05, "loss": 0.8308, "step": 224160 }, { "epoch": 3.935637914991485, "grad_norm": 0.044095131822475216, "learning_rate": 2.41710507866677e-05, "loss": 0.8445, "step": 224170 }, { "epoch": 3.9358134798714866, "grad_norm": 0.05128925858956376, "learning_rate": 2.4164023303044872e-05, "loss": 0.8452, "step": 224180 }, { "epoch": 3.935989044751488, "grad_norm": 0.05506190554991513, "learning_rate": 2.415699679294862e-05, "loss": 0.8372, "step": 224190 }, { "epoch": 3.936164609631489, "grad_norm": 0.05524985307123473, "learning_rate": 2.4149971256467917e-05, "loss": 0.8365, "step": 224200 }, { "epoch": 3.936340174511491, "grad_norm": 0.048725493016499186, "learning_rate": 2.4142946693691803e-05, "loss": 0.8483, "step": 224210 }, { "epoch": 3.936515739391492, "grad_norm": 0.040402586094626054, "learning_rate": 2.4135923104709325e-05, "loss": 0.843, "step": 224220 }, { "epoch": 3.9366913042714935, "grad_norm": 0.0644290195135227, "learning_rate": 2.412890048960939e-05, "loss": 0.8368, "step": 224230 }, { "epoch": 3.936866869151495, "grad_norm": 0.0553975343681094, "learning_rate": 2.4121878848481025e-05, "loss": 0.8362, "step": 224240 }, { "epoch": 3.9370424340314965, "grad_norm": 0.046753908509745534, "learning_rate": 2.411485818141319e-05, "loss": 0.8371, "step": 224250 }, { "epoch": 3.937217998911498, "grad_norm": 0.057076619384453395, "learning_rate": 2.4107838488494865e-05, "loss": 0.8411, "step": 224260 }, { "epoch": 3.937393563791499, "grad_norm": 0.12401009607673923, "learning_rate": 2.410081976981493e-05, "loss": 0.8429, "step": 224270 }, { "epoch": 3.9375691286715004, "grad_norm": 0.06687806341301054, "learning_rate": 2.4093802025462344e-05, "loss": 0.842, "step": 224280 }, { "epoch": 3.937744693551502, "grad_norm": 0.06059138225631249, "learning_rate": 2.4086785255526062e-05, "loss": 0.844, "step": 224290 }, { "epoch": 3.9379202584315034, "grad_norm": 0.05168650466024851, "learning_rate": 2.4079769460094912e-05, "loss": 0.8283, "step": 224300 }, { "epoch": 3.938095823311505, "grad_norm": 0.049303920423355775, "learning_rate": 2.4072754639257837e-05, "loss": 0.8371, "step": 224310 }, { "epoch": 3.938271388191506, "grad_norm": 0.05676106125255165, "learning_rate": 2.4065740793103727e-05, "loss": 0.8447, "step": 224320 }, { "epoch": 3.938446953071508, "grad_norm": 0.05198962897004642, "learning_rate": 2.4058727921721393e-05, "loss": 0.8424, "step": 224330 }, { "epoch": 3.938622517951509, "grad_norm": 0.04804131022060601, "learning_rate": 2.405171602519972e-05, "loss": 0.8382, "step": 224340 }, { "epoch": 3.9387980828315103, "grad_norm": 0.07439879210683698, "learning_rate": 2.404470510362756e-05, "loss": 0.8469, "step": 224350 }, { "epoch": 3.938973647711512, "grad_norm": 0.057092042861448665, "learning_rate": 2.4037695157093764e-05, "loss": 0.8519, "step": 224360 }, { "epoch": 3.9391492125915133, "grad_norm": 0.07076730944604544, "learning_rate": 2.403068618568709e-05, "loss": 0.8382, "step": 224370 }, { "epoch": 3.9393247774715148, "grad_norm": 0.04069287065214631, "learning_rate": 2.402367818949637e-05, "loss": 0.8395, "step": 224380 }, { "epoch": 3.939500342351516, "grad_norm": 0.05485048636218767, "learning_rate": 2.4016671168610427e-05, "loss": 0.8406, "step": 224390 }, { "epoch": 3.9396759072315173, "grad_norm": 0.04633185227112773, "learning_rate": 2.400966512311799e-05, "loss": 0.8441, "step": 224400 }, { "epoch": 3.9398514721115188, "grad_norm": 0.0557285575178913, "learning_rate": 2.400266005310785e-05, "loss": 0.8397, "step": 224410 }, { "epoch": 3.9400270369915202, "grad_norm": 0.046156037465429046, "learning_rate": 2.3995655958668757e-05, "loss": 0.8434, "step": 224420 }, { "epoch": 3.9402026018715217, "grad_norm": 0.059985935508809166, "learning_rate": 2.3988652839889493e-05, "loss": 0.8339, "step": 224430 }, { "epoch": 3.9403781667515227, "grad_norm": 0.05016818211120627, "learning_rate": 2.398165069685874e-05, "loss": 0.8374, "step": 224440 }, { "epoch": 3.9405537316315247, "grad_norm": 0.04968732113795546, "learning_rate": 2.3974649529665226e-05, "loss": 0.8428, "step": 224450 }, { "epoch": 3.9407292965115257, "grad_norm": 0.04317397619716404, "learning_rate": 2.3967649338397684e-05, "loss": 0.8358, "step": 224460 }, { "epoch": 3.940904861391527, "grad_norm": 0.04648251506646281, "learning_rate": 2.3960650123144766e-05, "loss": 0.8346, "step": 224470 }, { "epoch": 3.9410804262715287, "grad_norm": 0.06623392657144216, "learning_rate": 2.3953651883995175e-05, "loss": 0.8367, "step": 224480 }, { "epoch": 3.94125599115153, "grad_norm": 0.07177572560274494, "learning_rate": 2.394665462103761e-05, "loss": 0.8493, "step": 224490 }, { "epoch": 3.9414315560315316, "grad_norm": 0.048518738869775264, "learning_rate": 2.393965833436066e-05, "loss": 0.8437, "step": 224500 }, { "epoch": 3.9416071209115326, "grad_norm": 0.04831438592413749, "learning_rate": 2.3932663024053026e-05, "loss": 0.8406, "step": 224510 }, { "epoch": 3.941782685791534, "grad_norm": 0.05846614966278079, "learning_rate": 2.3925668690203316e-05, "loss": 0.8353, "step": 224520 }, { "epoch": 3.9419582506715356, "grad_norm": 0.068675400877912, "learning_rate": 2.391867533290017e-05, "loss": 0.8431, "step": 224530 }, { "epoch": 3.942133815551537, "grad_norm": 0.06320666224647879, "learning_rate": 2.3911682952232163e-05, "loss": 0.8399, "step": 224540 }, { "epoch": 3.9423093804315386, "grad_norm": 0.057210744658155226, "learning_rate": 2.3904691548287903e-05, "loss": 0.8399, "step": 224550 }, { "epoch": 3.94248494531154, "grad_norm": 0.05391059429535026, "learning_rate": 2.389770112115601e-05, "loss": 0.8406, "step": 224560 }, { "epoch": 3.9426605101915415, "grad_norm": 0.05395559766949972, "learning_rate": 2.389071167092499e-05, "loss": 0.8428, "step": 224570 }, { "epoch": 3.9428360750715425, "grad_norm": 0.046366601196112495, "learning_rate": 2.388372319768343e-05, "loss": 0.8401, "step": 224580 }, { "epoch": 3.943011639951544, "grad_norm": 0.06418005373784866, "learning_rate": 2.387673570151987e-05, "loss": 0.8401, "step": 224590 }, { "epoch": 3.9431872048315455, "grad_norm": 0.04845294425421254, "learning_rate": 2.386974918252288e-05, "loss": 0.8412, "step": 224600 }, { "epoch": 3.943362769711547, "grad_norm": 0.049253453459686916, "learning_rate": 2.3862763640780926e-05, "loss": 0.8458, "step": 224610 }, { "epoch": 3.9435383345915485, "grad_norm": 0.051672303299260064, "learning_rate": 2.3855779076382543e-05, "loss": 0.8401, "step": 224620 }, { "epoch": 3.9437138994715495, "grad_norm": 0.07562344674236351, "learning_rate": 2.3848795489416248e-05, "loss": 0.8367, "step": 224630 }, { "epoch": 3.943889464351551, "grad_norm": 0.048478626739089585, "learning_rate": 2.384181287997047e-05, "loss": 0.842, "step": 224640 }, { "epoch": 3.9440650292315524, "grad_norm": 0.06210288773760766, "learning_rate": 2.3834831248133707e-05, "loss": 0.838, "step": 224650 }, { "epoch": 3.944240594111554, "grad_norm": 0.04665486008408933, "learning_rate": 2.3827850593994457e-05, "loss": 0.8405, "step": 224660 }, { "epoch": 3.9444161589915554, "grad_norm": 0.061912702848381435, "learning_rate": 2.38208709176411e-05, "loss": 0.8399, "step": 224670 }, { "epoch": 3.944591723871557, "grad_norm": 0.043842179393565495, "learning_rate": 2.3813892219162105e-05, "loss": 0.8314, "step": 224680 }, { "epoch": 3.9447672887515584, "grad_norm": 0.04749931174912437, "learning_rate": 2.3806914498645873e-05, "loss": 0.8434, "step": 224690 }, { "epoch": 3.9449428536315594, "grad_norm": 0.055888787498836356, "learning_rate": 2.3799937756180874e-05, "loss": 0.8421, "step": 224700 }, { "epoch": 3.945118418511561, "grad_norm": 0.04921552296985843, "learning_rate": 2.379296199185542e-05, "loss": 0.8376, "step": 224710 }, { "epoch": 3.9452939833915623, "grad_norm": 0.04738319084311507, "learning_rate": 2.3785987205757937e-05, "loss": 0.8375, "step": 224720 }, { "epoch": 3.945469548271564, "grad_norm": 0.052032268133969856, "learning_rate": 2.3779013397976817e-05, "loss": 0.8393, "step": 224730 }, { "epoch": 3.9456451131515653, "grad_norm": 0.054983528357267185, "learning_rate": 2.377204056860037e-05, "loss": 0.8309, "step": 224740 }, { "epoch": 3.9458206780315663, "grad_norm": 0.06575384920093152, "learning_rate": 2.376506871771697e-05, "loss": 0.84, "step": 224750 }, { "epoch": 3.945996242911568, "grad_norm": 0.05502459380931436, "learning_rate": 2.3758097845414952e-05, "loss": 0.8418, "step": 224760 }, { "epoch": 3.9461718077915693, "grad_norm": 0.04698816824625595, "learning_rate": 2.3751127951782677e-05, "loss": 0.8429, "step": 224770 }, { "epoch": 3.9463473726715708, "grad_norm": 0.06837682643443004, "learning_rate": 2.3744159036908376e-05, "loss": 0.8384, "step": 224780 }, { "epoch": 3.9465229375515722, "grad_norm": 0.049644604669017614, "learning_rate": 2.373719110088038e-05, "loss": 0.8332, "step": 224790 }, { "epoch": 3.9466985024315737, "grad_norm": 0.05669255780967697, "learning_rate": 2.3730224143787022e-05, "loss": 0.8355, "step": 224800 }, { "epoch": 3.946874067311575, "grad_norm": 0.06456288139721122, "learning_rate": 2.37232581657165e-05, "loss": 0.8337, "step": 224810 }, { "epoch": 3.9470496321915762, "grad_norm": 0.06307850878788412, "learning_rate": 2.37162931667571e-05, "loss": 0.8391, "step": 224820 }, { "epoch": 3.9472251970715777, "grad_norm": 0.06912312947790923, "learning_rate": 2.370932914699712e-05, "loss": 0.8357, "step": 224830 }, { "epoch": 3.947400761951579, "grad_norm": 0.049213703689543115, "learning_rate": 2.3702366106524716e-05, "loss": 0.8438, "step": 224840 }, { "epoch": 3.9475763268315807, "grad_norm": 0.05753624121901695, "learning_rate": 2.3695404045428137e-05, "loss": 0.8439, "step": 224850 }, { "epoch": 3.947751891711582, "grad_norm": 0.048883732112584746, "learning_rate": 2.3688442963795616e-05, "loss": 0.8364, "step": 224860 }, { "epoch": 3.947927456591583, "grad_norm": 0.05227169470510754, "learning_rate": 2.3681482861715366e-05, "loss": 0.8339, "step": 224870 }, { "epoch": 3.948103021471585, "grad_norm": 0.04587854214649641, "learning_rate": 2.367452373927552e-05, "loss": 0.8343, "step": 224880 }, { "epoch": 3.948278586351586, "grad_norm": 0.049475846648960695, "learning_rate": 2.366756559656427e-05, "loss": 0.8418, "step": 224890 }, { "epoch": 3.9484541512315876, "grad_norm": 0.06701148413519502, "learning_rate": 2.3660608433669814e-05, "loss": 0.8423, "step": 224900 }, { "epoch": 3.948629716111589, "grad_norm": 0.06260232919526293, "learning_rate": 2.365365225068025e-05, "loss": 0.837, "step": 224910 }, { "epoch": 3.9488052809915906, "grad_norm": 0.06329780080211406, "learning_rate": 2.364669704768373e-05, "loss": 0.834, "step": 224920 }, { "epoch": 3.948980845871592, "grad_norm": 0.06754892195110392, "learning_rate": 2.3639742824768386e-05, "loss": 0.8516, "step": 224930 }, { "epoch": 3.949156410751593, "grad_norm": 0.05910715414148592, "learning_rate": 2.363278958202236e-05, "loss": 0.8405, "step": 224940 }, { "epoch": 3.9493319756315945, "grad_norm": 0.06515496981216616, "learning_rate": 2.3625837319533677e-05, "loss": 0.8309, "step": 224950 }, { "epoch": 3.949507540511596, "grad_norm": 0.041502391121396975, "learning_rate": 2.3618886037390474e-05, "loss": 0.8367, "step": 224960 }, { "epoch": 3.9496831053915975, "grad_norm": 0.05000038999335869, "learning_rate": 2.3611935735680836e-05, "loss": 0.8378, "step": 224970 }, { "epoch": 3.949858670271599, "grad_norm": 0.053879330042811216, "learning_rate": 2.3604986414492774e-05, "loss": 0.8378, "step": 224980 }, { "epoch": 3.9500342351516, "grad_norm": 0.07346493553644391, "learning_rate": 2.3598038073914376e-05, "loss": 0.8378, "step": 224990 }, { "epoch": 3.950209800031602, "grad_norm": 0.04840284641736829, "learning_rate": 2.3591090714033695e-05, "loss": 0.8289, "step": 225000 }, { "epoch": 3.950385364911603, "grad_norm": 0.04353469045987646, "learning_rate": 2.3584144334938695e-05, "loss": 0.8436, "step": 225010 }, { "epoch": 3.9505609297916044, "grad_norm": 0.060955053933933905, "learning_rate": 2.3577198936717432e-05, "loss": 0.8358, "step": 225020 }, { "epoch": 3.950736494671606, "grad_norm": 0.04854924015224883, "learning_rate": 2.3570254519457884e-05, "loss": 0.8335, "step": 225030 }, { "epoch": 3.9509120595516074, "grad_norm": 0.0447376968715446, "learning_rate": 2.3563311083248094e-05, "loss": 0.8342, "step": 225040 }, { "epoch": 3.951087624431609, "grad_norm": 0.051643653905525944, "learning_rate": 2.3556368628175954e-05, "loss": 0.8403, "step": 225050 }, { "epoch": 3.95126318931161, "grad_norm": 0.05049232012012966, "learning_rate": 2.3549427154329473e-05, "loss": 0.8417, "step": 225060 }, { "epoch": 3.9514387541916114, "grad_norm": 0.05695566957856484, "learning_rate": 2.354248666179662e-05, "loss": 0.8412, "step": 225070 }, { "epoch": 3.951614319071613, "grad_norm": 0.049236166393520436, "learning_rate": 2.3535547150665278e-05, "loss": 0.844, "step": 225080 }, { "epoch": 3.9517898839516143, "grad_norm": 0.06313357509540621, "learning_rate": 2.352860862102339e-05, "loss": 0.8346, "step": 225090 }, { "epoch": 3.951965448831616, "grad_norm": 0.05510044611014629, "learning_rate": 2.352167107295889e-05, "loss": 0.8383, "step": 225100 }, { "epoch": 3.952141013711617, "grad_norm": 0.06470712392658955, "learning_rate": 2.3514734506559685e-05, "loss": 0.8373, "step": 225110 }, { "epoch": 3.952316578591619, "grad_norm": 0.05203772168287974, "learning_rate": 2.3507798921913614e-05, "loss": 0.8375, "step": 225120 }, { "epoch": 3.95249214347162, "grad_norm": 0.05403772098405379, "learning_rate": 2.3500864319108593e-05, "loss": 0.8343, "step": 225130 }, { "epoch": 3.9526677083516213, "grad_norm": 0.045242162580193056, "learning_rate": 2.34939306982325e-05, "loss": 0.8435, "step": 225140 }, { "epoch": 3.9528432732316228, "grad_norm": 0.051923852448948815, "learning_rate": 2.3486998059373122e-05, "loss": 0.8344, "step": 225150 }, { "epoch": 3.9530188381116242, "grad_norm": 0.057886710015134187, "learning_rate": 2.348006640261834e-05, "loss": 0.837, "step": 225160 }, { "epoch": 3.9531944029916257, "grad_norm": 0.04705839178828937, "learning_rate": 2.3473135728056e-05, "loss": 0.8546, "step": 225170 }, { "epoch": 3.9533699678716268, "grad_norm": 0.04783437126401676, "learning_rate": 2.3466206035773866e-05, "loss": 0.8435, "step": 225180 }, { "epoch": 3.9535455327516282, "grad_norm": 0.07179909373520822, "learning_rate": 2.3459277325859753e-05, "loss": 0.8414, "step": 225190 }, { "epoch": 3.9537210976316297, "grad_norm": 0.04217915594854657, "learning_rate": 2.3452349598401465e-05, "loss": 0.8436, "step": 225200 }, { "epoch": 3.953896662511631, "grad_norm": 0.0638730192624945, "learning_rate": 2.3445422853486794e-05, "loss": 0.8439, "step": 225210 }, { "epoch": 3.9540722273916327, "grad_norm": 0.04682166556422792, "learning_rate": 2.3438497091203444e-05, "loss": 0.841, "step": 225220 }, { "epoch": 3.954247792271634, "grad_norm": 0.06905096161551508, "learning_rate": 2.3431572311639204e-05, "loss": 0.8331, "step": 225230 }, { "epoch": 3.9544233571516356, "grad_norm": 0.059520755992303935, "learning_rate": 2.342464851488184e-05, "loss": 0.8419, "step": 225240 }, { "epoch": 3.9545989220316367, "grad_norm": 0.05540182845579542, "learning_rate": 2.3417725701019015e-05, "loss": 0.8376, "step": 225250 }, { "epoch": 3.954774486911638, "grad_norm": 0.0420725183467415, "learning_rate": 2.3410803870138472e-05, "loss": 0.833, "step": 225260 }, { "epoch": 3.9549500517916396, "grad_norm": 0.06379193013100236, "learning_rate": 2.3403883022327926e-05, "loss": 0.8444, "step": 225270 }, { "epoch": 3.955125616671641, "grad_norm": 0.04924774012722095, "learning_rate": 2.3396963157675073e-05, "loss": 0.837, "step": 225280 }, { "epoch": 3.9553011815516426, "grad_norm": 0.0616076780021955, "learning_rate": 2.3390044276267543e-05, "loss": 0.8466, "step": 225290 }, { "epoch": 3.9554767464316436, "grad_norm": 0.051607132087945075, "learning_rate": 2.3383126378193015e-05, "loss": 0.8439, "step": 225300 }, { "epoch": 3.955652311311645, "grad_norm": 0.060178467303109946, "learning_rate": 2.337620946353919e-05, "loss": 0.8474, "step": 225310 }, { "epoch": 3.9558278761916466, "grad_norm": 0.04065689051682223, "learning_rate": 2.3369293532393642e-05, "loss": 0.8451, "step": 225320 }, { "epoch": 3.956003441071648, "grad_norm": 0.049374929055085656, "learning_rate": 2.3362378584844014e-05, "loss": 0.8431, "step": 225330 }, { "epoch": 3.9561790059516495, "grad_norm": 0.05401239693261681, "learning_rate": 2.335546462097797e-05, "loss": 0.834, "step": 225340 }, { "epoch": 3.956354570831651, "grad_norm": 0.04038480289425001, "learning_rate": 2.3348551640883032e-05, "loss": 0.8475, "step": 225350 }, { "epoch": 3.9565301357116525, "grad_norm": 0.051792679062727776, "learning_rate": 2.334163964464682e-05, "loss": 0.8346, "step": 225360 }, { "epoch": 3.9567057005916535, "grad_norm": 0.07478411283900097, "learning_rate": 2.3334728632356925e-05, "loss": 0.8435, "step": 225370 }, { "epoch": 3.956881265471655, "grad_norm": 0.06219660307306123, "learning_rate": 2.3327818604100933e-05, "loss": 0.8334, "step": 225380 }, { "epoch": 3.9570568303516565, "grad_norm": 0.042181317765587686, "learning_rate": 2.3320909559966333e-05, "loss": 0.8395, "step": 225390 }, { "epoch": 3.957232395231658, "grad_norm": 0.049165429798655604, "learning_rate": 2.3314001500040693e-05, "loss": 0.8317, "step": 225400 }, { "epoch": 3.9574079601116594, "grad_norm": 0.0593172415425077, "learning_rate": 2.3307094424411573e-05, "loss": 0.8369, "step": 225410 }, { "epoch": 3.9575835249916604, "grad_norm": 0.04505635264516648, "learning_rate": 2.3300188333166435e-05, "loss": 0.8498, "step": 225420 }, { "epoch": 3.957759089871662, "grad_norm": 0.05300548871726665, "learning_rate": 2.32932832263928e-05, "loss": 0.8424, "step": 225430 }, { "epoch": 3.9579346547516634, "grad_norm": 0.05111450859715031, "learning_rate": 2.328637910417815e-05, "loss": 0.8387, "step": 225440 }, { "epoch": 3.958110219631665, "grad_norm": 0.051273390350459025, "learning_rate": 2.327947596661001e-05, "loss": 0.8393, "step": 225450 }, { "epoch": 3.9582857845116663, "grad_norm": 0.04667941341002912, "learning_rate": 2.3272573813775765e-05, "loss": 0.8381, "step": 225460 }, { "epoch": 3.958461349391668, "grad_norm": 0.05198671493753532, "learning_rate": 2.3265672645762916e-05, "loss": 0.8307, "step": 225470 }, { "epoch": 3.9586369142716693, "grad_norm": 0.06288627013909785, "learning_rate": 2.325877246265891e-05, "loss": 0.8307, "step": 225480 }, { "epoch": 3.9588124791516703, "grad_norm": 0.05099434428653766, "learning_rate": 2.3251873264551144e-05, "loss": 0.8334, "step": 225490 }, { "epoch": 3.958988044031672, "grad_norm": 0.06215934327510436, "learning_rate": 2.3244975051527024e-05, "loss": 0.8401, "step": 225500 }, { "epoch": 3.9591636089116733, "grad_norm": 0.04317158311767044, "learning_rate": 2.3238077823674012e-05, "loss": 0.8406, "step": 225510 }, { "epoch": 3.9593391737916748, "grad_norm": 0.06692603823672869, "learning_rate": 2.323118158107943e-05, "loss": 0.8415, "step": 225520 }, { "epoch": 3.9595147386716762, "grad_norm": 0.0623700518599184, "learning_rate": 2.322428632383068e-05, "loss": 0.8376, "step": 225530 }, { "epoch": 3.9596903035516773, "grad_norm": 0.044918763334732846, "learning_rate": 2.321739205201513e-05, "loss": 0.8496, "step": 225540 }, { "epoch": 3.959865868431679, "grad_norm": 0.04524257608110994, "learning_rate": 2.3210498765720158e-05, "loss": 0.8289, "step": 225550 }, { "epoch": 3.9600414333116802, "grad_norm": 0.05476486599744425, "learning_rate": 2.320360646503303e-05, "loss": 0.8347, "step": 225560 }, { "epoch": 3.9602169981916817, "grad_norm": 0.05653042211268956, "learning_rate": 2.319671515004114e-05, "loss": 0.8322, "step": 225570 }, { "epoch": 3.960392563071683, "grad_norm": 0.06011296293197752, "learning_rate": 2.3189824820831797e-05, "loss": 0.8355, "step": 225580 }, { "epoch": 3.9605681279516847, "grad_norm": 0.048529690788469734, "learning_rate": 2.3182935477492256e-05, "loss": 0.8361, "step": 225590 }, { "epoch": 3.960743692831686, "grad_norm": 0.04369726379883062, "learning_rate": 2.3176047120109846e-05, "loss": 0.8399, "step": 225600 }, { "epoch": 3.960919257711687, "grad_norm": 0.05875493691745331, "learning_rate": 2.316915974877183e-05, "loss": 0.8418, "step": 225610 }, { "epoch": 3.9610948225916887, "grad_norm": 0.04850909848446647, "learning_rate": 2.3162273363565494e-05, "loss": 0.8416, "step": 225620 }, { "epoch": 3.96127038747169, "grad_norm": 0.05305197394927158, "learning_rate": 2.3155387964578057e-05, "loss": 0.8411, "step": 225630 }, { "epoch": 3.9614459523516916, "grad_norm": 0.05144439003435167, "learning_rate": 2.3148503551896763e-05, "loss": 0.8418, "step": 225640 }, { "epoch": 3.961621517231693, "grad_norm": 0.06314365854403091, "learning_rate": 2.3141620125608888e-05, "loss": 0.8404, "step": 225650 }, { "epoch": 3.961797082111694, "grad_norm": 0.05508293084541895, "learning_rate": 2.3134737685801576e-05, "loss": 0.8399, "step": 225660 }, { "epoch": 3.961972646991696, "grad_norm": 0.04644905192659217, "learning_rate": 2.3127856232562056e-05, "loss": 0.8362, "step": 225670 }, { "epoch": 3.962148211871697, "grad_norm": 0.04960314415503474, "learning_rate": 2.3120975765977555e-05, "loss": 0.847, "step": 225680 }, { "epoch": 3.9623237767516986, "grad_norm": 0.05440822991071069, "learning_rate": 2.3114096286135185e-05, "loss": 0.8342, "step": 225690 }, { "epoch": 3.9624993416317, "grad_norm": 0.04911120295128656, "learning_rate": 2.3107217793122134e-05, "loss": 0.8344, "step": 225700 }, { "epoch": 3.9626749065117015, "grad_norm": 0.054306748731680586, "learning_rate": 2.3100340287025567e-05, "loss": 0.8458, "step": 225710 }, { "epoch": 3.962850471391703, "grad_norm": 0.046142468181516724, "learning_rate": 2.309346376793265e-05, "loss": 0.8405, "step": 225720 }, { "epoch": 3.963026036271704, "grad_norm": 0.06213790586703322, "learning_rate": 2.3086588235930457e-05, "loss": 0.839, "step": 225730 }, { "epoch": 3.9632016011517055, "grad_norm": 0.05806237269574725, "learning_rate": 2.307971369110611e-05, "loss": 0.8384, "step": 225740 }, { "epoch": 3.963377166031707, "grad_norm": 0.05165416410915949, "learning_rate": 2.3072840133546755e-05, "loss": 0.8367, "step": 225750 }, { "epoch": 3.9635527309117085, "grad_norm": 0.04647062367936725, "learning_rate": 2.3065967563339427e-05, "loss": 0.8393, "step": 225760 }, { "epoch": 3.96372829579171, "grad_norm": 0.05269270487894632, "learning_rate": 2.3059095980571222e-05, "loss": 0.8409, "step": 225770 }, { "epoch": 3.963903860671711, "grad_norm": 0.04399231996183998, "learning_rate": 2.3052225385329214e-05, "loss": 0.8361, "step": 225780 }, { "epoch": 3.964079425551713, "grad_norm": 0.048290862896502744, "learning_rate": 2.3045355777700463e-05, "loss": 0.8326, "step": 225790 }, { "epoch": 3.964254990431714, "grad_norm": 0.043381131159501686, "learning_rate": 2.3038487157771986e-05, "loss": 0.8349, "step": 225800 }, { "epoch": 3.9644305553117154, "grad_norm": 0.04975475729327939, "learning_rate": 2.3031619525630802e-05, "loss": 0.8408, "step": 225810 }, { "epoch": 3.964606120191717, "grad_norm": 0.05145619193170061, "learning_rate": 2.302475288136398e-05, "loss": 0.8403, "step": 225820 }, { "epoch": 3.9647816850717184, "grad_norm": 0.0539233215433761, "learning_rate": 2.301788722505845e-05, "loss": 0.8427, "step": 225830 }, { "epoch": 3.96495724995172, "grad_norm": 0.05076041489299348, "learning_rate": 2.301102255680122e-05, "loss": 0.8494, "step": 225840 }, { "epoch": 3.965132814831721, "grad_norm": 0.06951154600721028, "learning_rate": 2.3004158876679335e-05, "loss": 0.842, "step": 225850 }, { "epoch": 3.9653083797117223, "grad_norm": 0.0568043682715977, "learning_rate": 2.2997296184779655e-05, "loss": 0.8347, "step": 225860 }, { "epoch": 3.965483944591724, "grad_norm": 0.0762659302176215, "learning_rate": 2.299043448118919e-05, "loss": 0.8368, "step": 225870 }, { "epoch": 3.9656595094717253, "grad_norm": 0.05090812348334133, "learning_rate": 2.2983573765994877e-05, "loss": 0.8367, "step": 225880 }, { "epoch": 3.9658350743517268, "grad_norm": 0.0744969642370656, "learning_rate": 2.2976714039283655e-05, "loss": 0.8316, "step": 225890 }, { "epoch": 3.966010639231728, "grad_norm": 0.04816313560943294, "learning_rate": 2.2969855301142397e-05, "loss": 0.8365, "step": 225900 }, { "epoch": 3.9661862041117297, "grad_norm": 0.04096872107683798, "learning_rate": 2.296299755165802e-05, "loss": 0.8473, "step": 225910 }, { "epoch": 3.9663617689917308, "grad_norm": 0.04760117685538072, "learning_rate": 2.295614079091746e-05, "loss": 0.8468, "step": 225920 }, { "epoch": 3.9665373338717322, "grad_norm": 0.0514970219977188, "learning_rate": 2.2949285019007517e-05, "loss": 0.8416, "step": 225930 }, { "epoch": 3.9667128987517337, "grad_norm": 0.05104448249231461, "learning_rate": 2.294243023601509e-05, "loss": 0.8345, "step": 225940 }, { "epoch": 3.966888463631735, "grad_norm": 0.04870001931343752, "learning_rate": 2.2935576442027026e-05, "loss": 0.8419, "step": 225950 }, { "epoch": 3.9670640285117367, "grad_norm": 0.04963396392310607, "learning_rate": 2.2928723637130206e-05, "loss": 0.8473, "step": 225960 }, { "epoch": 3.9672395933917377, "grad_norm": 0.037654005215487105, "learning_rate": 2.2921871821411392e-05, "loss": 0.8397, "step": 225970 }, { "epoch": 3.967415158271739, "grad_norm": 0.047628208381196024, "learning_rate": 2.2915020994957424e-05, "loss": 0.8414, "step": 225980 }, { "epoch": 3.9675907231517407, "grad_norm": 0.05544774623214029, "learning_rate": 2.2908171157855133e-05, "loss": 0.841, "step": 225990 }, { "epoch": 3.967766288031742, "grad_norm": 0.06092133986118265, "learning_rate": 2.2901322310191252e-05, "loss": 0.8434, "step": 226000 }, { "epoch": 3.9679418529117436, "grad_norm": 0.08808143597500004, "learning_rate": 2.2894474452052588e-05, "loss": 0.851, "step": 226010 }, { "epoch": 3.968117417791745, "grad_norm": 0.058044730309808536, "learning_rate": 2.288762758352592e-05, "loss": 0.8374, "step": 226020 }, { "epoch": 3.9682929826717466, "grad_norm": 0.049579162511288355, "learning_rate": 2.2880781704697962e-05, "loss": 0.8381, "step": 226030 }, { "epoch": 3.9684685475517476, "grad_norm": 0.05747987005939569, "learning_rate": 2.2873936815655462e-05, "loss": 0.8495, "step": 226040 }, { "epoch": 3.968644112431749, "grad_norm": 0.05321774459553546, "learning_rate": 2.286709291648516e-05, "loss": 0.8396, "step": 226050 }, { "epoch": 3.9688196773117506, "grad_norm": 0.05674652664605749, "learning_rate": 2.2860250007273792e-05, "loss": 0.8376, "step": 226060 }, { "epoch": 3.968995242191752, "grad_norm": 0.08130360691728586, "learning_rate": 2.2853408088108007e-05, "loss": 0.8414, "step": 226070 }, { "epoch": 3.9691708070717535, "grad_norm": 0.06112022776402533, "learning_rate": 2.284656715907452e-05, "loss": 0.8332, "step": 226080 }, { "epoch": 3.9693463719517545, "grad_norm": 0.04543150610007141, "learning_rate": 2.2839727220260033e-05, "loss": 0.8375, "step": 226090 }, { "epoch": 3.969521936831756, "grad_norm": 0.05779857467308036, "learning_rate": 2.2832888271751144e-05, "loss": 0.8392, "step": 226100 }, { "epoch": 3.9696975017117575, "grad_norm": 0.05821896444890764, "learning_rate": 2.2826050313634557e-05, "loss": 0.8414, "step": 226110 }, { "epoch": 3.969873066591759, "grad_norm": 0.05155330135743396, "learning_rate": 2.281921334599688e-05, "loss": 0.8478, "step": 226120 }, { "epoch": 3.9700486314717605, "grad_norm": 0.09931996245028926, "learning_rate": 2.281237736892479e-05, "loss": 0.8385, "step": 226130 }, { "epoch": 3.970224196351762, "grad_norm": 0.0583234613979553, "learning_rate": 2.280554238250483e-05, "loss": 0.8423, "step": 226140 }, { "epoch": 3.9703997612317634, "grad_norm": 0.06167940754758536, "learning_rate": 2.2798708386823638e-05, "loss": 0.8472, "step": 226150 }, { "epoch": 3.9705753261117644, "grad_norm": 0.04318904904847112, "learning_rate": 2.2791875381967816e-05, "loss": 0.8437, "step": 226160 }, { "epoch": 3.970750890991766, "grad_norm": 0.0526727922047294, "learning_rate": 2.2785043368023907e-05, "loss": 0.837, "step": 226170 }, { "epoch": 3.9709264558717674, "grad_norm": 0.04802089832016558, "learning_rate": 2.2778212345078486e-05, "loss": 0.8359, "step": 226180 }, { "epoch": 3.971102020751769, "grad_norm": 0.04099142111290917, "learning_rate": 2.2771382313218124e-05, "loss": 0.8408, "step": 226190 }, { "epoch": 3.9712775856317704, "grad_norm": 0.04415385326178758, "learning_rate": 2.2764553272529317e-05, "loss": 0.833, "step": 226200 }, { "epoch": 3.9714531505117714, "grad_norm": 0.06822338137746732, "learning_rate": 2.2757725223098614e-05, "loss": 0.8478, "step": 226210 }, { "epoch": 3.971628715391773, "grad_norm": 0.05602027772896421, "learning_rate": 2.275089816501254e-05, "loss": 0.845, "step": 226220 }, { "epoch": 3.9718042802717743, "grad_norm": 0.05760572681883167, "learning_rate": 2.2744072098357596e-05, "loss": 0.8423, "step": 226230 }, { "epoch": 3.971979845151776, "grad_norm": 0.04953970877146825, "learning_rate": 2.2737247023220227e-05, "loss": 0.8333, "step": 226240 }, { "epoch": 3.9721554100317773, "grad_norm": 0.09406755921083576, "learning_rate": 2.2730422939686942e-05, "loss": 0.8388, "step": 226250 }, { "epoch": 3.972330974911779, "grad_norm": 0.052307735572449776, "learning_rate": 2.2723599847844223e-05, "loss": 0.842, "step": 226260 }, { "epoch": 3.9725065397917803, "grad_norm": 0.059341669485236274, "learning_rate": 2.271677774777848e-05, "loss": 0.8421, "step": 226270 }, { "epoch": 3.9726821046717813, "grad_norm": 0.05514423131880389, "learning_rate": 2.270995663957615e-05, "loss": 0.8449, "step": 226280 }, { "epoch": 3.9728576695517828, "grad_norm": 0.07219590867987821, "learning_rate": 2.2703136523323685e-05, "loss": 0.8365, "step": 226290 }, { "epoch": 3.9730332344317842, "grad_norm": 0.06632044946837093, "learning_rate": 2.2696317399107506e-05, "loss": 0.8417, "step": 226300 }, { "epoch": 3.9732087993117857, "grad_norm": 0.051871066281613254, "learning_rate": 2.2689499267013968e-05, "loss": 0.8438, "step": 226310 }, { "epoch": 3.973384364191787, "grad_norm": 0.04339304581261615, "learning_rate": 2.268268212712949e-05, "loss": 0.8434, "step": 226320 }, { "epoch": 3.9735599290717882, "grad_norm": 0.06119972028314861, "learning_rate": 2.2675865979540456e-05, "loss": 0.848, "step": 226330 }, { "epoch": 3.97373549395179, "grad_norm": 0.04400076654777094, "learning_rate": 2.2669050824333185e-05, "loss": 0.8417, "step": 226340 }, { "epoch": 3.973911058831791, "grad_norm": 0.0525107264846198, "learning_rate": 2.2662236661594053e-05, "loss": 0.8404, "step": 226350 }, { "epoch": 3.9740866237117927, "grad_norm": 0.07176461329430332, "learning_rate": 2.2655423491409423e-05, "loss": 0.8378, "step": 226360 }, { "epoch": 3.974262188591794, "grad_norm": 0.04759042074798834, "learning_rate": 2.264861131386556e-05, "loss": 0.8339, "step": 226370 }, { "epoch": 3.9744377534717956, "grad_norm": 0.055375307348988954, "learning_rate": 2.2641800129048814e-05, "loss": 0.8397, "step": 226380 }, { "epoch": 3.974613318351797, "grad_norm": 0.05386799268699527, "learning_rate": 2.2634989937045474e-05, "loss": 0.8412, "step": 226390 }, { "epoch": 3.974788883231798, "grad_norm": 0.06752287827360776, "learning_rate": 2.2628180737941863e-05, "loss": 0.8412, "step": 226400 }, { "epoch": 3.9749644481117996, "grad_norm": 0.055117084391553106, "learning_rate": 2.262137253182419e-05, "loss": 0.8415, "step": 226410 }, { "epoch": 3.975140012991801, "grad_norm": 0.05647062233880191, "learning_rate": 2.2614565318778755e-05, "loss": 0.8437, "step": 226420 }, { "epoch": 3.9753155778718026, "grad_norm": 0.050731061063847546, "learning_rate": 2.2607759098891823e-05, "loss": 0.8369, "step": 226430 }, { "epoch": 3.975491142751804, "grad_norm": 0.0576610907283689, "learning_rate": 2.2600953872249576e-05, "loss": 0.84, "step": 226440 }, { "epoch": 3.975666707631805, "grad_norm": 0.05629774407810526, "learning_rate": 2.2594149638938265e-05, "loss": 0.8347, "step": 226450 }, { "epoch": 3.975842272511807, "grad_norm": 0.05692489711987001, "learning_rate": 2.2587346399044115e-05, "loss": 0.8353, "step": 226460 }, { "epoch": 3.976017837391808, "grad_norm": 0.052533202782194914, "learning_rate": 2.258054415265334e-05, "loss": 0.8387, "step": 226470 }, { "epoch": 3.9761934022718095, "grad_norm": 0.047580343551097755, "learning_rate": 2.2573742899852067e-05, "loss": 0.8489, "step": 226480 }, { "epoch": 3.976368967151811, "grad_norm": 0.05894713918098907, "learning_rate": 2.2566942640726503e-05, "loss": 0.8445, "step": 226490 }, { "epoch": 3.9765445320318125, "grad_norm": 0.06589652583978228, "learning_rate": 2.2560143375362836e-05, "loss": 0.8465, "step": 226500 }, { "epoch": 3.976720096911814, "grad_norm": 0.04912885202807653, "learning_rate": 2.2553345103847154e-05, "loss": 0.8377, "step": 226510 }, { "epoch": 3.976895661791815, "grad_norm": 0.04967598928977722, "learning_rate": 2.2546547826265634e-05, "loss": 0.8412, "step": 226520 }, { "epoch": 3.9770712266718165, "grad_norm": 0.053025090462624394, "learning_rate": 2.253975154270441e-05, "loss": 0.838, "step": 226530 }, { "epoch": 3.977246791551818, "grad_norm": 0.0733917367508938, "learning_rate": 2.2532956253249538e-05, "loss": 0.8395, "step": 226540 }, { "epoch": 3.9774223564318194, "grad_norm": 0.05804177859321806, "learning_rate": 2.2526161957987156e-05, "loss": 0.8415, "step": 226550 }, { "epoch": 3.977597921311821, "grad_norm": 0.04522171746689349, "learning_rate": 2.2519368657003337e-05, "loss": 0.8473, "step": 226560 }, { "epoch": 3.977773486191822, "grad_norm": 0.05263783458610326, "learning_rate": 2.251257635038419e-05, "loss": 0.8382, "step": 226570 }, { "epoch": 3.977949051071824, "grad_norm": 0.04030167210870562, "learning_rate": 2.2505785038215704e-05, "loss": 0.8424, "step": 226580 }, { "epoch": 3.978124615951825, "grad_norm": 0.050655501568373895, "learning_rate": 2.2498994720583975e-05, "loss": 0.8396, "step": 226590 }, { "epoch": 3.9783001808318263, "grad_norm": 0.05934574735300298, "learning_rate": 2.2492205397575054e-05, "loss": 0.8443, "step": 226600 }, { "epoch": 3.978475745711828, "grad_norm": 0.04607425069909458, "learning_rate": 2.2485417069274908e-05, "loss": 0.8412, "step": 226610 }, { "epoch": 3.9786513105918293, "grad_norm": 0.05017890199837621, "learning_rate": 2.247862973576958e-05, "loss": 0.8421, "step": 226620 }, { "epoch": 3.978826875471831, "grad_norm": 0.05915294043108734, "learning_rate": 2.2471843397145056e-05, "loss": 0.8419, "step": 226630 }, { "epoch": 3.979002440351832, "grad_norm": 0.042735548323021115, "learning_rate": 2.2465058053487354e-05, "loss": 0.8353, "step": 226640 }, { "epoch": 3.9791780052318333, "grad_norm": 0.04600409985834753, "learning_rate": 2.245827370488239e-05, "loss": 0.8402, "step": 226650 }, { "epoch": 3.9793535701118348, "grad_norm": 0.05762249195205805, "learning_rate": 2.2451490351416144e-05, "loss": 0.8394, "step": 226660 }, { "epoch": 3.9795291349918362, "grad_norm": 0.0538147471147669, "learning_rate": 2.2444707993174606e-05, "loss": 0.8421, "step": 226670 }, { "epoch": 3.9797046998718377, "grad_norm": 0.06489724446999166, "learning_rate": 2.243792663024365e-05, "loss": 0.8494, "step": 226680 }, { "epoch": 3.979880264751839, "grad_norm": 0.054526430257454156, "learning_rate": 2.2431146262709205e-05, "loss": 0.8327, "step": 226690 }, { "epoch": 3.9800558296318407, "grad_norm": 0.04935380854943934, "learning_rate": 2.2424366890657233e-05, "loss": 0.8431, "step": 226700 }, { "epoch": 3.9802313945118417, "grad_norm": 0.04886860390380545, "learning_rate": 2.2417588514173557e-05, "loss": 0.8445, "step": 226710 }, { "epoch": 3.980406959391843, "grad_norm": 0.05523251629182382, "learning_rate": 2.2410811133344103e-05, "loss": 0.8456, "step": 226720 }, { "epoch": 3.9805825242718447, "grad_norm": 0.07114654957068613, "learning_rate": 2.2404034748254727e-05, "loss": 0.84, "step": 226730 }, { "epoch": 3.980758089151846, "grad_norm": 0.04412216259962399, "learning_rate": 2.239725935899132e-05, "loss": 0.8463, "step": 226740 }, { "epoch": 3.9809336540318476, "grad_norm": 0.06477963054771592, "learning_rate": 2.2390484965639686e-05, "loss": 0.8341, "step": 226750 }, { "epoch": 3.9811092189118487, "grad_norm": 0.05905944682539789, "learning_rate": 2.238371156828566e-05, "loss": 0.8345, "step": 226760 }, { "epoch": 3.98128478379185, "grad_norm": 0.0585468497602008, "learning_rate": 2.2376939167015103e-05, "loss": 0.8333, "step": 226770 }, { "epoch": 3.9814603486718516, "grad_norm": 0.05293529518075925, "learning_rate": 2.2370167761913767e-05, "loss": 0.841, "step": 226780 }, { "epoch": 3.981635913551853, "grad_norm": 0.061130676721526195, "learning_rate": 2.2363397353067488e-05, "loss": 0.8388, "step": 226790 }, { "epoch": 3.9818114784318546, "grad_norm": 0.052330863669197286, "learning_rate": 2.2356627940562023e-05, "loss": 0.8413, "step": 226800 }, { "epoch": 3.981987043311856, "grad_norm": 0.048977060569110165, "learning_rate": 2.2349859524483183e-05, "loss": 0.8412, "step": 226810 }, { "epoch": 3.9821626081918575, "grad_norm": 0.050657880300437885, "learning_rate": 2.2343092104916673e-05, "loss": 0.8324, "step": 226820 }, { "epoch": 3.9823381730718586, "grad_norm": 0.05252651624162582, "learning_rate": 2.2336325681948255e-05, "loss": 0.8443, "step": 226830 }, { "epoch": 3.98251373795186, "grad_norm": 0.05309517342250724, "learning_rate": 2.23295602556637e-05, "loss": 0.8435, "step": 226840 }, { "epoch": 3.9826893028318615, "grad_norm": 0.0517466596639959, "learning_rate": 2.232279582614865e-05, "loss": 0.8347, "step": 226850 }, { "epoch": 3.982864867711863, "grad_norm": 0.047026939750811896, "learning_rate": 2.231603239348887e-05, "loss": 0.8401, "step": 226860 }, { "epoch": 3.9830404325918645, "grad_norm": 0.05190582402197581, "learning_rate": 2.2309269957770073e-05, "loss": 0.8356, "step": 226870 }, { "epoch": 3.9832159974718655, "grad_norm": 0.05491869208987791, "learning_rate": 2.230250851907786e-05, "loss": 0.8397, "step": 226880 }, { "epoch": 3.983391562351867, "grad_norm": 0.048200231425669246, "learning_rate": 2.2295748077497957e-05, "loss": 0.8401, "step": 226890 }, { "epoch": 3.9835671272318685, "grad_norm": 0.06086131852712872, "learning_rate": 2.2288988633116013e-05, "loss": 0.8407, "step": 226900 }, { "epoch": 3.98374269211187, "grad_norm": 0.04643107966836554, "learning_rate": 2.2282230186017695e-05, "loss": 0.8433, "step": 226910 }, { "epoch": 3.9839182569918714, "grad_norm": 0.0637420371047163, "learning_rate": 2.2275472736288577e-05, "loss": 0.8368, "step": 226920 }, { "epoch": 3.984093821871873, "grad_norm": 0.05394362726078167, "learning_rate": 2.2268716284014297e-05, "loss": 0.8335, "step": 226930 }, { "epoch": 3.9842693867518744, "grad_norm": 0.04842638468783668, "learning_rate": 2.2261960829280507e-05, "loss": 0.8344, "step": 226940 }, { "epoch": 3.9844449516318754, "grad_norm": 0.05564621547371898, "learning_rate": 2.225520637217274e-05, "loss": 0.8405, "step": 226950 }, { "epoch": 3.984620516511877, "grad_norm": 0.06042874191280068, "learning_rate": 2.22484529127766e-05, "loss": 0.8409, "step": 226960 }, { "epoch": 3.9847960813918784, "grad_norm": 0.0445711983724221, "learning_rate": 2.2241700451177647e-05, "loss": 0.8414, "step": 226970 }, { "epoch": 3.98497164627188, "grad_norm": 0.04991288363773975, "learning_rate": 2.223494898746147e-05, "loss": 0.8381, "step": 226980 }, { "epoch": 3.9851472111518813, "grad_norm": 0.0549724311428672, "learning_rate": 2.2228198521713565e-05, "loss": 0.8392, "step": 226990 }, { "epoch": 3.9853227760318823, "grad_norm": 0.05238576929733188, "learning_rate": 2.2221449054019473e-05, "loss": 0.837, "step": 227000 }, { "epoch": 3.9854983409118843, "grad_norm": 0.050892611974480126, "learning_rate": 2.2214700584464757e-05, "loss": 0.8433, "step": 227010 }, { "epoch": 3.9856739057918853, "grad_norm": 0.04712938905241488, "learning_rate": 2.2207953113134843e-05, "loss": 0.8379, "step": 227020 }, { "epoch": 3.9858494706718868, "grad_norm": 0.06144109925414211, "learning_rate": 2.220120664011527e-05, "loss": 0.8386, "step": 227030 }, { "epoch": 3.9860250355518883, "grad_norm": 0.051123138552844394, "learning_rate": 2.219446116549154e-05, "loss": 0.8412, "step": 227040 }, { "epoch": 3.9862006004318897, "grad_norm": 0.057301333050542545, "learning_rate": 2.2187716689349065e-05, "loss": 0.8388, "step": 227050 }, { "epoch": 3.986376165311891, "grad_norm": 0.04999942688027113, "learning_rate": 2.2180973211773313e-05, "loss": 0.8371, "step": 227060 }, { "epoch": 3.9865517301918922, "grad_norm": 0.041899314672286865, "learning_rate": 2.217423073284975e-05, "loss": 0.8347, "step": 227070 }, { "epoch": 3.9867272950718937, "grad_norm": 0.0678330494323735, "learning_rate": 2.2167489252663803e-05, "loss": 0.8382, "step": 227080 }, { "epoch": 3.986902859951895, "grad_norm": 0.05063874507699166, "learning_rate": 2.2160748771300857e-05, "loss": 0.8361, "step": 227090 }, { "epoch": 3.9870784248318967, "grad_norm": 0.056270212583534006, "learning_rate": 2.2154009288846336e-05, "loss": 0.8382, "step": 227100 }, { "epoch": 3.987253989711898, "grad_norm": 0.044495914589400026, "learning_rate": 2.2147270805385647e-05, "loss": 0.8479, "step": 227110 }, { "epoch": 3.987429554591899, "grad_norm": 0.06365743977912393, "learning_rate": 2.2140533321004134e-05, "loss": 0.8472, "step": 227120 }, { "epoch": 3.987605119471901, "grad_norm": 0.0587582558292326, "learning_rate": 2.213379683578717e-05, "loss": 0.8408, "step": 227130 }, { "epoch": 3.987780684351902, "grad_norm": 0.044106034482195404, "learning_rate": 2.2127061349820115e-05, "loss": 0.8305, "step": 227140 }, { "epoch": 3.9879562492319036, "grad_norm": 0.06985879722761215, "learning_rate": 2.2120326863188322e-05, "loss": 0.8357, "step": 227150 }, { "epoch": 3.988131814111905, "grad_norm": 0.04636394821963975, "learning_rate": 2.2113593375977094e-05, "loss": 0.8371, "step": 227160 }, { "epoch": 3.9883073789919066, "grad_norm": 0.05118090177600886, "learning_rate": 2.2106860888271753e-05, "loss": 0.8461, "step": 227170 }, { "epoch": 3.988482943871908, "grad_norm": 0.06809298143067441, "learning_rate": 2.2100129400157637e-05, "loss": 0.8351, "step": 227180 }, { "epoch": 3.988658508751909, "grad_norm": 0.0703820686601126, "learning_rate": 2.2093398911719965e-05, "loss": 0.8432, "step": 227190 }, { "epoch": 3.9888340736319106, "grad_norm": 0.04650253318459866, "learning_rate": 2.2086669423044058e-05, "loss": 0.8382, "step": 227200 }, { "epoch": 3.989009638511912, "grad_norm": 0.046454558814848945, "learning_rate": 2.2079940934215197e-05, "loss": 0.8422, "step": 227210 }, { "epoch": 3.9891852033919135, "grad_norm": 0.05696606857623079, "learning_rate": 2.2073213445318583e-05, "loss": 0.8355, "step": 227220 }, { "epoch": 3.989360768271915, "grad_norm": 0.05914474126213256, "learning_rate": 2.2066486956439485e-05, "loss": 0.841, "step": 227230 }, { "epoch": 3.989536333151916, "grad_norm": 0.06160147967250436, "learning_rate": 2.2059761467663114e-05, "loss": 0.8327, "step": 227240 }, { "epoch": 3.989711898031918, "grad_norm": 0.04738466682364862, "learning_rate": 2.205303697907473e-05, "loss": 0.8309, "step": 227250 }, { "epoch": 3.989887462911919, "grad_norm": 0.05865267959277675, "learning_rate": 2.204631349075947e-05, "loss": 0.8412, "step": 227260 }, { "epoch": 3.9900630277919205, "grad_norm": 0.04801719091928055, "learning_rate": 2.203959100280255e-05, "loss": 0.8431, "step": 227270 }, { "epoch": 3.990238592671922, "grad_norm": 0.05068138024911812, "learning_rate": 2.2032869515289172e-05, "loss": 0.8354, "step": 227280 }, { "epoch": 3.9904141575519234, "grad_norm": 0.05285638115743442, "learning_rate": 2.2026149028304443e-05, "loss": 0.8439, "step": 227290 }, { "epoch": 3.990589722431925, "grad_norm": 0.046168469860517494, "learning_rate": 2.2019429541933534e-05, "loss": 0.8397, "step": 227300 }, { "epoch": 3.990765287311926, "grad_norm": 0.049915204902594754, "learning_rate": 2.2012711056261602e-05, "loss": 0.836, "step": 227310 }, { "epoch": 3.9909408521919274, "grad_norm": 0.05225951905147399, "learning_rate": 2.2005993571373776e-05, "loss": 0.8353, "step": 227320 }, { "epoch": 3.991116417071929, "grad_norm": 0.050939780476679644, "learning_rate": 2.1999277087355126e-05, "loss": 0.8361, "step": 227330 }, { "epoch": 3.9912919819519304, "grad_norm": 0.05735679077132707, "learning_rate": 2.199256160429078e-05, "loss": 0.8414, "step": 227340 }, { "epoch": 3.991467546831932, "grad_norm": 0.05613003837982818, "learning_rate": 2.1985847122265846e-05, "loss": 0.8358, "step": 227350 }, { "epoch": 3.991643111711933, "grad_norm": 0.04670553408005558, "learning_rate": 2.1979133641365336e-05, "loss": 0.842, "step": 227360 }, { "epoch": 3.991818676591935, "grad_norm": 0.05468236048512556, "learning_rate": 2.197242116167436e-05, "loss": 0.8392, "step": 227370 }, { "epoch": 3.991994241471936, "grad_norm": 0.06431976723018325, "learning_rate": 2.1965709683277975e-05, "loss": 0.8393, "step": 227380 }, { "epoch": 3.9921698063519373, "grad_norm": 0.050171303876855616, "learning_rate": 2.1958999206261167e-05, "loss": 0.8346, "step": 227390 }, { "epoch": 3.9923453712319388, "grad_norm": 0.05342600629208988, "learning_rate": 2.1952289730708984e-05, "loss": 0.8403, "step": 227400 }, { "epoch": 3.9925209361119403, "grad_norm": 0.059078162549785256, "learning_rate": 2.1945581256706443e-05, "loss": 0.8362, "step": 227410 }, { "epoch": 3.9926965009919417, "grad_norm": 0.06994252526884759, "learning_rate": 2.193887378433856e-05, "loss": 0.8387, "step": 227420 }, { "epoch": 3.9928720658719428, "grad_norm": 0.05652007592176914, "learning_rate": 2.1932167313690286e-05, "loss": 0.8428, "step": 227430 }, { "epoch": 3.9930476307519442, "grad_norm": 0.053275299655313454, "learning_rate": 2.192546184484659e-05, "loss": 0.8412, "step": 227440 }, { "epoch": 3.9932231956319457, "grad_norm": 0.06141809818510227, "learning_rate": 2.1918757377892484e-05, "loss": 0.8386, "step": 227450 }, { "epoch": 3.993398760511947, "grad_norm": 0.04433531287015612, "learning_rate": 2.1912053912912844e-05, "loss": 0.848, "step": 227460 }, { "epoch": 3.9935743253919487, "grad_norm": 0.04751364168424958, "learning_rate": 2.190535144999264e-05, "loss": 0.8409, "step": 227470 }, { "epoch": 3.99374989027195, "grad_norm": 0.0750135911876498, "learning_rate": 2.1898649989216797e-05, "loss": 0.8424, "step": 227480 }, { "epoch": 3.9939254551519516, "grad_norm": 0.0500484918823922, "learning_rate": 2.1891949530670245e-05, "loss": 0.8373, "step": 227490 }, { "epoch": 3.9941010200319527, "grad_norm": 0.05247891712527428, "learning_rate": 2.188525007443782e-05, "loss": 0.8377, "step": 227500 }, { "epoch": 3.994276584911954, "grad_norm": 0.055226559510793756, "learning_rate": 2.1878551620604435e-05, "loss": 0.8426, "step": 227510 }, { "epoch": 3.9944521497919556, "grad_norm": 0.058196530672554495, "learning_rate": 2.1871854169255005e-05, "loss": 0.8398, "step": 227520 }, { "epoch": 3.994627714671957, "grad_norm": 0.04643825595916368, "learning_rate": 2.186515772047431e-05, "loss": 0.8476, "step": 227530 }, { "epoch": 3.9948032795519586, "grad_norm": 0.05002692232524761, "learning_rate": 2.185846227434724e-05, "loss": 0.8392, "step": 227540 }, { "epoch": 3.9949788444319596, "grad_norm": 0.059652802799637084, "learning_rate": 2.185176783095864e-05, "loss": 0.8388, "step": 227550 }, { "epoch": 3.995154409311961, "grad_norm": 0.04701987923405061, "learning_rate": 2.184507439039328e-05, "loss": 0.8427, "step": 227560 }, { "epoch": 3.9953299741919626, "grad_norm": 0.04754415936385225, "learning_rate": 2.183838195273601e-05, "loss": 0.8333, "step": 227570 }, { "epoch": 3.995505539071964, "grad_norm": 0.05763676965584185, "learning_rate": 2.1831690518071608e-05, "loss": 0.8354, "step": 227580 }, { "epoch": 3.9956811039519655, "grad_norm": 0.05689969035069112, "learning_rate": 2.1825000086484878e-05, "loss": 0.8404, "step": 227590 }, { "epoch": 3.995856668831967, "grad_norm": 0.05448919626840739, "learning_rate": 2.1818310658060552e-05, "loss": 0.8403, "step": 227600 }, { "epoch": 3.9960322337119685, "grad_norm": 0.04050177803148031, "learning_rate": 2.18116222328834e-05, "loss": 0.8446, "step": 227610 }, { "epoch": 3.9962077985919695, "grad_norm": 0.06555207188507584, "learning_rate": 2.18049348110382e-05, "loss": 0.8377, "step": 227620 }, { "epoch": 3.996383363471971, "grad_norm": 0.048013916196612404, "learning_rate": 2.1798248392609633e-05, "loss": 0.8482, "step": 227630 }, { "epoch": 3.9965589283519725, "grad_norm": 0.056765352553869754, "learning_rate": 2.179156297768243e-05, "loss": 0.8367, "step": 227640 }, { "epoch": 3.996734493231974, "grad_norm": 0.04808561064388354, "learning_rate": 2.1784878566341308e-05, "loss": 0.8398, "step": 227650 }, { "epoch": 3.9969100581119754, "grad_norm": 0.044499473038397755, "learning_rate": 2.1778195158670976e-05, "loss": 0.8313, "step": 227660 }, { "epoch": 3.9970856229919765, "grad_norm": 0.06031799051844324, "learning_rate": 2.1771512754756073e-05, "loss": 0.8396, "step": 227670 }, { "epoch": 3.997261187871978, "grad_norm": 0.08360284177449616, "learning_rate": 2.1764831354681294e-05, "loss": 0.8443, "step": 227680 }, { "epoch": 3.9974367527519794, "grad_norm": 0.05094004927161172, "learning_rate": 2.17581509585313e-05, "loss": 0.835, "step": 227690 }, { "epoch": 3.997612317631981, "grad_norm": 0.060297494807806176, "learning_rate": 2.175147156639071e-05, "loss": 0.8333, "step": 227700 }, { "epoch": 3.9977878825119824, "grad_norm": 0.05201927064065457, "learning_rate": 2.1744793178344156e-05, "loss": 0.836, "step": 227710 }, { "epoch": 3.997963447391984, "grad_norm": 0.04141621294754992, "learning_rate": 2.1738115794476276e-05, "loss": 0.8433, "step": 227720 }, { "epoch": 3.9981390122719853, "grad_norm": 0.08568285220978245, "learning_rate": 2.1731439414871652e-05, "loss": 0.8373, "step": 227730 }, { "epoch": 3.9983145771519863, "grad_norm": 0.049851209795767584, "learning_rate": 2.1724764039614864e-05, "loss": 0.8403, "step": 227740 }, { "epoch": 3.998490142031988, "grad_norm": 0.04993672049989771, "learning_rate": 2.1718089668790518e-05, "loss": 0.8394, "step": 227750 }, { "epoch": 3.9986657069119893, "grad_norm": 0.04795401766234504, "learning_rate": 2.1711416302483198e-05, "loss": 0.8408, "step": 227760 }, { "epoch": 3.998841271791991, "grad_norm": 0.05780452401840443, "learning_rate": 2.1704743940777404e-05, "loss": 0.8369, "step": 227770 }, { "epoch": 3.9990168366719923, "grad_norm": 0.05975472531800212, "learning_rate": 2.1698072583757697e-05, "loss": 0.8391, "step": 227780 }, { "epoch": 3.9991924015519933, "grad_norm": 0.053032980724063344, "learning_rate": 2.169140223150864e-05, "loss": 0.8282, "step": 227790 }, { "epoch": 3.999367966431995, "grad_norm": 0.054319920763426725, "learning_rate": 2.1684732884114688e-05, "loss": 0.8359, "step": 227800 }, { "epoch": 3.9995435313119962, "grad_norm": 0.049958317580873775, "learning_rate": 2.167806454166036e-05, "loss": 0.8335, "step": 227810 }, { "epoch": 3.9997190961919977, "grad_norm": 0.051806917227034066, "learning_rate": 2.1671397204230166e-05, "loss": 0.8361, "step": 227820 }, { "epoch": 3.999894661071999, "grad_norm": 0.046906767647533626, "learning_rate": 2.1664730871908602e-05, "loss": 0.8363, "step": 227830 }, { "epoch": 4.000070225952, "grad_norm": 0.051430352993023336, "learning_rate": 2.1658065544780068e-05, "loss": 0.8405, "step": 227840 }, { "epoch": 4.000245790832002, "grad_norm": 0.0576767638382457, "learning_rate": 2.1651401222929053e-05, "loss": 0.8392, "step": 227850 }, { "epoch": 4.000421355712003, "grad_norm": 0.055760159437616204, "learning_rate": 2.1644737906440024e-05, "loss": 0.834, "step": 227860 }, { "epoch": 4.000596920592005, "grad_norm": 0.04785713747947853, "learning_rate": 2.163807559539734e-05, "loss": 0.8366, "step": 227870 }, { "epoch": 4.000772485472006, "grad_norm": 0.05321898556249583, "learning_rate": 2.163141428988545e-05, "loss": 0.8324, "step": 227880 }, { "epoch": 4.000948050352007, "grad_norm": 0.052794632320891065, "learning_rate": 2.1624753989988776e-05, "loss": 0.8376, "step": 227890 }, { "epoch": 4.001123615232009, "grad_norm": 0.049229136786189835, "learning_rate": 2.1618094695791656e-05, "loss": 0.8365, "step": 227900 }, { "epoch": 4.00129918011201, "grad_norm": 0.054097480519453574, "learning_rate": 2.1611436407378493e-05, "loss": 0.8308, "step": 227910 }, { "epoch": 4.001474744992012, "grad_norm": 0.05695705889895112, "learning_rate": 2.1604779124833645e-05, "loss": 0.8379, "step": 227920 }, { "epoch": 4.001650309872013, "grad_norm": 0.03968928346594312, "learning_rate": 2.159812284824149e-05, "loss": 0.8352, "step": 227930 }, { "epoch": 4.001825874752015, "grad_norm": 0.058436739569295966, "learning_rate": 2.15914675776863e-05, "loss": 0.8422, "step": 227940 }, { "epoch": 4.002001439632016, "grad_norm": 0.05193673626605812, "learning_rate": 2.1584813313252438e-05, "loss": 0.8429, "step": 227950 }, { "epoch": 4.002177004512017, "grad_norm": 0.04539036144571544, "learning_rate": 2.157816005502425e-05, "loss": 0.8412, "step": 227960 }, { "epoch": 4.002352569392019, "grad_norm": 0.044633279325265514, "learning_rate": 2.1571507803085958e-05, "loss": 0.8347, "step": 227970 }, { "epoch": 4.00252813427202, "grad_norm": 0.06620418899545902, "learning_rate": 2.1564856557521877e-05, "loss": 0.8381, "step": 227980 }, { "epoch": 4.002703699152022, "grad_norm": 0.05065771949103179, "learning_rate": 2.1558206318416303e-05, "loss": 0.8404, "step": 227990 }, { "epoch": 4.002879264032023, "grad_norm": 0.044932618370699506, "learning_rate": 2.155155708585349e-05, "loss": 0.8416, "step": 228000 }, { "epoch": 4.003054828912024, "grad_norm": 0.05617844566079629, "learning_rate": 2.1544908859917663e-05, "loss": 0.8376, "step": 228010 }, { "epoch": 4.003230393792026, "grad_norm": 0.05420921302396657, "learning_rate": 2.1538261640693062e-05, "loss": 0.8325, "step": 228020 }, { "epoch": 4.003405958672027, "grad_norm": 0.053776786613921636, "learning_rate": 2.153161542826394e-05, "loss": 0.8299, "step": 228030 }, { "epoch": 4.003581523552029, "grad_norm": 0.05558587015981874, "learning_rate": 2.1524970222714455e-05, "loss": 0.8469, "step": 228040 }, { "epoch": 4.00375708843203, "grad_norm": 0.05382133791337714, "learning_rate": 2.1518326024128823e-05, "loss": 0.8357, "step": 228050 }, { "epoch": 4.003932653312032, "grad_norm": 0.04429267282932918, "learning_rate": 2.151168283259127e-05, "loss": 0.8331, "step": 228060 }, { "epoch": 4.004108218192033, "grad_norm": 0.05881223693866763, "learning_rate": 2.1505040648185896e-05, "loss": 0.8375, "step": 228070 }, { "epoch": 4.004283783072034, "grad_norm": 0.06304114584147148, "learning_rate": 2.1498399470996897e-05, "loss": 0.8387, "step": 228080 }, { "epoch": 4.004459347952036, "grad_norm": 0.11822737583314105, "learning_rate": 2.1491759301108413e-05, "loss": 0.8425, "step": 228090 }, { "epoch": 4.004634912832037, "grad_norm": 0.056233956238061086, "learning_rate": 2.14851201386046e-05, "loss": 0.8397, "step": 228100 }, { "epoch": 4.004810477712039, "grad_norm": 0.05198281883434514, "learning_rate": 2.1478481983569528e-05, "loss": 0.8381, "step": 228110 }, { "epoch": 4.00498604259204, "grad_norm": 0.050789157913089585, "learning_rate": 2.147184483608733e-05, "loss": 0.8403, "step": 228120 }, { "epoch": 4.005161607472041, "grad_norm": 0.047468325381398474, "learning_rate": 2.146520869624212e-05, "loss": 0.8414, "step": 228130 }, { "epoch": 4.005337172352043, "grad_norm": 0.0684045241733024, "learning_rate": 2.145857356411794e-05, "loss": 0.8445, "step": 228140 }, { "epoch": 4.005512737232044, "grad_norm": 0.04074191680106977, "learning_rate": 2.1451939439798873e-05, "loss": 0.8418, "step": 228150 }, { "epoch": 4.005688302112046, "grad_norm": 0.04274926323190166, "learning_rate": 2.1445306323368982e-05, "loss": 0.8359, "step": 228160 }, { "epoch": 4.005863866992047, "grad_norm": 0.054030238801262585, "learning_rate": 2.1438674214912324e-05, "loss": 0.8325, "step": 228170 }, { "epoch": 4.006039431872049, "grad_norm": 0.05288313714138526, "learning_rate": 2.1432043114512897e-05, "loss": 0.8312, "step": 228180 }, { "epoch": 4.00621499675205, "grad_norm": 0.04703872549835186, "learning_rate": 2.142541302225473e-05, "loss": 0.8343, "step": 228190 }, { "epoch": 4.006390561632051, "grad_norm": 0.042878462653139646, "learning_rate": 2.1418783938221857e-05, "loss": 0.8413, "step": 228200 }, { "epoch": 4.006566126512053, "grad_norm": 0.056131825493496795, "learning_rate": 2.1412155862498205e-05, "loss": 0.8366, "step": 228210 }, { "epoch": 4.006741691392054, "grad_norm": 0.07259725555726868, "learning_rate": 2.1405528795167802e-05, "loss": 0.8376, "step": 228220 }, { "epoch": 4.006917256272056, "grad_norm": 0.04912893715620019, "learning_rate": 2.1398902736314633e-05, "loss": 0.8394, "step": 228230 }, { "epoch": 4.007092821152057, "grad_norm": 0.05356669705135017, "learning_rate": 2.1392277686022584e-05, "loss": 0.842, "step": 228240 }, { "epoch": 4.007268386032058, "grad_norm": 0.07336891363176681, "learning_rate": 2.138565364437564e-05, "loss": 0.8425, "step": 228250 }, { "epoch": 4.00744395091206, "grad_norm": 0.06322305476642862, "learning_rate": 2.137903061145772e-05, "loss": 0.8381, "step": 228260 }, { "epoch": 4.007619515792061, "grad_norm": 0.04487674400774926, "learning_rate": 2.1372408587352775e-05, "loss": 0.8404, "step": 228270 }, { "epoch": 4.007795080672063, "grad_norm": 0.05593163457808573, "learning_rate": 2.1365787572144646e-05, "loss": 0.8411, "step": 228280 }, { "epoch": 4.007970645552064, "grad_norm": 0.0666449676803826, "learning_rate": 2.1359167565917237e-05, "loss": 0.8399, "step": 228290 }, { "epoch": 4.0081462104320655, "grad_norm": 0.050035597456899256, "learning_rate": 2.1352548568754483e-05, "loss": 0.8423, "step": 228300 }, { "epoch": 4.008321775312067, "grad_norm": 0.04431230228786136, "learning_rate": 2.134593058074016e-05, "loss": 0.8418, "step": 228310 }, { "epoch": 4.008497340192068, "grad_norm": 0.05154150147786471, "learning_rate": 2.1339313601958173e-05, "loss": 0.8444, "step": 228320 }, { "epoch": 4.0086729050720695, "grad_norm": 0.06364232171501465, "learning_rate": 2.1332697632492348e-05, "loss": 0.8359, "step": 228330 }, { "epoch": 4.008848469952071, "grad_norm": 0.049803764303391354, "learning_rate": 2.132608267242654e-05, "loss": 0.8458, "step": 228340 }, { "epoch": 4.0090240348320725, "grad_norm": 0.053056963271339755, "learning_rate": 2.1319468721844507e-05, "loss": 0.8417, "step": 228350 }, { "epoch": 4.0091995997120735, "grad_norm": 0.04697944963477992, "learning_rate": 2.1312855780830073e-05, "loss": 0.8418, "step": 228360 }, { "epoch": 4.009375164592075, "grad_norm": 0.05803210479643901, "learning_rate": 2.1306243849467053e-05, "loss": 0.8399, "step": 228370 }, { "epoch": 4.0095507294720765, "grad_norm": 0.05661515366311055, "learning_rate": 2.129963292783917e-05, "loss": 0.8294, "step": 228380 }, { "epoch": 4.0097262943520775, "grad_norm": 0.047304491595377, "learning_rate": 2.1293023016030214e-05, "loss": 0.8408, "step": 228390 }, { "epoch": 4.009901859232079, "grad_norm": 0.04292884025567959, "learning_rate": 2.1286414114123966e-05, "loss": 0.8361, "step": 228400 }, { "epoch": 4.0100774241120805, "grad_norm": 0.05472924179654737, "learning_rate": 2.12798062222041e-05, "loss": 0.8421, "step": 228410 }, { "epoch": 4.010252988992082, "grad_norm": 0.053983467934737496, "learning_rate": 2.1273199340354358e-05, "loss": 0.8386, "step": 228420 }, { "epoch": 4.010428553872083, "grad_norm": 0.046289193116781126, "learning_rate": 2.1266593468658466e-05, "loss": 0.8306, "step": 228430 }, { "epoch": 4.0106041187520844, "grad_norm": 0.05601504261186893, "learning_rate": 2.125998860720014e-05, "loss": 0.8398, "step": 228440 }, { "epoch": 4.010779683632086, "grad_norm": 0.054285550175944615, "learning_rate": 2.125338475606301e-05, "loss": 0.8425, "step": 228450 }, { "epoch": 4.010955248512087, "grad_norm": 0.07392760727142308, "learning_rate": 2.1246781915330782e-05, "loss": 0.8409, "step": 228460 }, { "epoch": 4.011130813392089, "grad_norm": 0.057907368671282954, "learning_rate": 2.1240180085087145e-05, "loss": 0.8395, "step": 228470 }, { "epoch": 4.01130637827209, "grad_norm": 0.05155061607121357, "learning_rate": 2.1233579265415682e-05, "loss": 0.8428, "step": 228480 }, { "epoch": 4.011481943152092, "grad_norm": 0.06077826723396317, "learning_rate": 2.1226979456400052e-05, "loss": 0.8364, "step": 228490 }, { "epoch": 4.011657508032093, "grad_norm": 0.07068341476732239, "learning_rate": 2.1220380658123888e-05, "loss": 0.8321, "step": 228500 }, { "epoch": 4.011833072912094, "grad_norm": 0.04661791102362994, "learning_rate": 2.1213782870670805e-05, "loss": 0.8415, "step": 228510 }, { "epoch": 4.012008637792096, "grad_norm": 0.0673298098082687, "learning_rate": 2.1207186094124372e-05, "loss": 0.8393, "step": 228520 }, { "epoch": 4.012184202672097, "grad_norm": 0.05359314444011101, "learning_rate": 2.1200590328568174e-05, "loss": 0.8348, "step": 228530 }, { "epoch": 4.012359767552099, "grad_norm": 0.056685701720829755, "learning_rate": 2.1193995574085816e-05, "loss": 0.8465, "step": 228540 }, { "epoch": 4.0125353324321, "grad_norm": 0.046000728534165376, "learning_rate": 2.1187401830760807e-05, "loss": 0.8382, "step": 228550 }, { "epoch": 4.012710897312101, "grad_norm": 0.04170575597062454, "learning_rate": 2.1180809098676716e-05, "loss": 0.8444, "step": 228560 }, { "epoch": 4.012886462192103, "grad_norm": 0.06877559088509268, "learning_rate": 2.117421737791709e-05, "loss": 0.8366, "step": 228570 }, { "epoch": 4.013062027072104, "grad_norm": 0.05102166288526095, "learning_rate": 2.1167626668565407e-05, "loss": 0.8398, "step": 228580 }, { "epoch": 4.013237591952106, "grad_norm": 0.05866404421887002, "learning_rate": 2.116103697070519e-05, "loss": 0.8362, "step": 228590 }, { "epoch": 4.013413156832107, "grad_norm": 0.051097366702653656, "learning_rate": 2.1154448284419935e-05, "loss": 0.8352, "step": 228600 }, { "epoch": 4.013588721712109, "grad_norm": 0.05332691387194018, "learning_rate": 2.1147860609793152e-05, "loss": 0.8349, "step": 228610 }, { "epoch": 4.01376428659211, "grad_norm": 0.0688926607579383, "learning_rate": 2.114127394690826e-05, "loss": 0.837, "step": 228620 }, { "epoch": 4.013939851472111, "grad_norm": 0.08593423603153874, "learning_rate": 2.1134688295848717e-05, "loss": 0.8369, "step": 228630 }, { "epoch": 4.014115416352113, "grad_norm": 0.043268393333409046, "learning_rate": 2.112810365669801e-05, "loss": 0.8347, "step": 228640 }, { "epoch": 4.014290981232114, "grad_norm": 0.07001389681159687, "learning_rate": 2.112152002953952e-05, "loss": 0.8417, "step": 228650 }, { "epoch": 4.014466546112116, "grad_norm": 0.046871567810866585, "learning_rate": 2.1114937414456668e-05, "loss": 0.8446, "step": 228660 }, { "epoch": 4.014642110992117, "grad_norm": 0.06935162948337731, "learning_rate": 2.1108355811532874e-05, "loss": 0.8313, "step": 228670 }, { "epoch": 4.014817675872118, "grad_norm": 0.046904488096247754, "learning_rate": 2.1101775220851556e-05, "loss": 0.8461, "step": 228680 }, { "epoch": 4.01499324075212, "grad_norm": 0.05080309763260092, "learning_rate": 2.1095195642496023e-05, "loss": 0.8333, "step": 228690 }, { "epoch": 4.015168805632121, "grad_norm": 0.048576151866846745, "learning_rate": 2.1088617076549674e-05, "loss": 0.8348, "step": 228700 }, { "epoch": 4.015344370512123, "grad_norm": 0.04706149783808083, "learning_rate": 2.1082039523095895e-05, "loss": 0.8441, "step": 228710 }, { "epoch": 4.015519935392124, "grad_norm": 0.046641459304434814, "learning_rate": 2.1075462982217963e-05, "loss": 0.8347, "step": 228720 }, { "epoch": 4.015695500272126, "grad_norm": 0.06544096926669513, "learning_rate": 2.1068887453999226e-05, "loss": 0.8439, "step": 228730 }, { "epoch": 4.015871065152127, "grad_norm": 0.06029203194313376, "learning_rate": 2.106231293852304e-05, "loss": 0.8348, "step": 228740 }, { "epoch": 4.016046630032128, "grad_norm": 0.06946006227116787, "learning_rate": 2.1055739435872634e-05, "loss": 0.8423, "step": 228750 }, { "epoch": 4.01622219491213, "grad_norm": 0.0556699317354452, "learning_rate": 2.1049166946131335e-05, "loss": 0.8397, "step": 228760 }, { "epoch": 4.016397759792131, "grad_norm": 0.05493212006811321, "learning_rate": 2.1042595469382417e-05, "loss": 0.8525, "step": 228770 }, { "epoch": 4.016573324672133, "grad_norm": 0.05001980939518627, "learning_rate": 2.103602500570916e-05, "loss": 0.8479, "step": 228780 }, { "epoch": 4.016748889552134, "grad_norm": 0.054303705657458144, "learning_rate": 2.1029455555194765e-05, "loss": 0.8393, "step": 228790 }, { "epoch": 4.016924454432135, "grad_norm": 0.055052570934754526, "learning_rate": 2.1022887117922486e-05, "loss": 0.8372, "step": 228800 }, { "epoch": 4.017100019312137, "grad_norm": 0.05373870906805272, "learning_rate": 2.1016319693975597e-05, "loss": 0.8377, "step": 228810 }, { "epoch": 4.017275584192138, "grad_norm": 0.05328153057326284, "learning_rate": 2.100975328343723e-05, "loss": 0.8378, "step": 228820 }, { "epoch": 4.01745114907214, "grad_norm": 0.052321304809117224, "learning_rate": 2.1003187886390625e-05, "loss": 0.8384, "step": 228830 }, { "epoch": 4.017626713952141, "grad_norm": 0.061335780381343505, "learning_rate": 2.0996623502918946e-05, "loss": 0.8362, "step": 228840 }, { "epoch": 4.017802278832143, "grad_norm": 0.04114487331837173, "learning_rate": 2.0990060133105422e-05, "loss": 0.8366, "step": 228850 }, { "epoch": 4.017977843712144, "grad_norm": 0.04617196205787872, "learning_rate": 2.0983497777033143e-05, "loss": 0.8319, "step": 228860 }, { "epoch": 4.018153408592145, "grad_norm": 0.04575493638678533, "learning_rate": 2.0976936434785275e-05, "loss": 0.8416, "step": 228870 }, { "epoch": 4.018328973472147, "grad_norm": 0.07639492808728432, "learning_rate": 2.097037610644499e-05, "loss": 0.8444, "step": 228880 }, { "epoch": 4.018504538352148, "grad_norm": 0.05285747748745995, "learning_rate": 2.0963816792095344e-05, "loss": 0.8375, "step": 228890 }, { "epoch": 4.01868010323215, "grad_norm": 0.052585179962595374, "learning_rate": 2.0957258491819484e-05, "loss": 0.8395, "step": 228900 }, { "epoch": 4.018855668112151, "grad_norm": 0.04465796165355224, "learning_rate": 2.0950701205700523e-05, "loss": 0.8452, "step": 228910 }, { "epoch": 4.019031232992152, "grad_norm": 0.044262827250961076, "learning_rate": 2.0944144933821504e-05, "loss": 0.8422, "step": 228920 }, { "epoch": 4.019206797872154, "grad_norm": 0.04681755782526959, "learning_rate": 2.0937589676265502e-05, "loss": 0.8395, "step": 228930 }, { "epoch": 4.019382362752155, "grad_norm": 0.05565973803937903, "learning_rate": 2.0931035433115582e-05, "loss": 0.8423, "step": 228940 }, { "epoch": 4.019557927632157, "grad_norm": 0.0548074844126927, "learning_rate": 2.0924482204454814e-05, "loss": 0.8495, "step": 228950 }, { "epoch": 4.019733492512158, "grad_norm": 0.048430253059878255, "learning_rate": 2.091792999036617e-05, "loss": 0.8398, "step": 228960 }, { "epoch": 4.01990905739216, "grad_norm": 0.05777988932159632, "learning_rate": 2.0911378790932716e-05, "loss": 0.8386, "step": 228970 }, { "epoch": 4.020084622272161, "grad_norm": 0.059178659428584024, "learning_rate": 2.090482860623745e-05, "loss": 0.8356, "step": 228980 }, { "epoch": 4.020260187152162, "grad_norm": 0.05119836354099496, "learning_rate": 2.0898279436363346e-05, "loss": 0.8403, "step": 228990 }, { "epoch": 4.020435752032164, "grad_norm": 0.04063604745279614, "learning_rate": 2.0891731281393385e-05, "loss": 0.8395, "step": 229000 }, { "epoch": 4.020611316912165, "grad_norm": 0.04188981306875498, "learning_rate": 2.0885184141410542e-05, "loss": 0.8414, "step": 229010 }, { "epoch": 4.020786881792167, "grad_norm": 0.0520519229039192, "learning_rate": 2.08786380164978e-05, "loss": 0.8319, "step": 229020 }, { "epoch": 4.020962446672168, "grad_norm": 0.05322617932147179, "learning_rate": 2.0872092906738038e-05, "loss": 0.8398, "step": 229030 }, { "epoch": 4.0211380115521695, "grad_norm": 0.05972623822650177, "learning_rate": 2.0865548812214215e-05, "loss": 0.8428, "step": 229040 }, { "epoch": 4.021313576432171, "grad_norm": 0.04614561226830447, "learning_rate": 2.0859005733009284e-05, "loss": 0.8392, "step": 229050 }, { "epoch": 4.021489141312172, "grad_norm": 0.054288280785504094, "learning_rate": 2.0852463669206067e-05, "loss": 0.8341, "step": 229060 }, { "epoch": 4.0216647061921735, "grad_norm": 0.054929403113677465, "learning_rate": 2.0845922620887503e-05, "loss": 0.8349, "step": 229070 }, { "epoch": 4.021840271072175, "grad_norm": 0.050251921852232984, "learning_rate": 2.0839382588136483e-05, "loss": 0.8437, "step": 229080 }, { "epoch": 4.0220158359521765, "grad_norm": 0.05684489863347725, "learning_rate": 2.0832843571035826e-05, "loss": 0.8379, "step": 229090 }, { "epoch": 4.0221914008321775, "grad_norm": 0.05980361390415074, "learning_rate": 2.08263055696684e-05, "loss": 0.8314, "step": 229100 }, { "epoch": 4.0223669657121786, "grad_norm": 0.05434443604606556, "learning_rate": 2.081976858411706e-05, "loss": 0.8314, "step": 229110 }, { "epoch": 4.0225425305921805, "grad_norm": 0.05395855289602521, "learning_rate": 2.0813232614464632e-05, "loss": 0.839, "step": 229120 }, { "epoch": 4.0227180954721815, "grad_norm": 0.05326344455832936, "learning_rate": 2.08066976607939e-05, "loss": 0.8402, "step": 229130 }, { "epoch": 4.022893660352183, "grad_norm": 0.05462504240120527, "learning_rate": 2.0800163723187667e-05, "loss": 0.8397, "step": 229140 }, { "epoch": 4.0230692252321845, "grad_norm": 0.05109738322699131, "learning_rate": 2.0793630801728766e-05, "loss": 0.8392, "step": 229150 }, { "epoch": 4.023244790112186, "grad_norm": 0.0781659077062586, "learning_rate": 2.0787098896499903e-05, "loss": 0.8342, "step": 229160 }, { "epoch": 4.023420354992187, "grad_norm": 0.04689314189005139, "learning_rate": 2.078056800758386e-05, "loss": 0.8451, "step": 229170 }, { "epoch": 4.0235959198721885, "grad_norm": 0.075037884509385, "learning_rate": 2.077403813506341e-05, "loss": 0.8305, "step": 229180 }, { "epoch": 4.02377148475219, "grad_norm": 0.04534386192080716, "learning_rate": 2.0767509279021295e-05, "loss": 0.8371, "step": 229190 }, { "epoch": 4.023947049632191, "grad_norm": 0.052122542147668725, "learning_rate": 2.076098143954018e-05, "loss": 0.8409, "step": 229200 }, { "epoch": 4.024122614512193, "grad_norm": 0.04731991775615175, "learning_rate": 2.0754454616702815e-05, "loss": 0.8376, "step": 229210 }, { "epoch": 4.024298179392194, "grad_norm": 0.07552432112392768, "learning_rate": 2.0747928810591908e-05, "loss": 0.8397, "step": 229220 }, { "epoch": 4.024473744272195, "grad_norm": 0.06688654622468823, "learning_rate": 2.07414040212901e-05, "loss": 0.8466, "step": 229230 }, { "epoch": 4.024649309152197, "grad_norm": 0.0595542574909324, "learning_rate": 2.0734880248880088e-05, "loss": 0.8358, "step": 229240 }, { "epoch": 4.024824874032198, "grad_norm": 0.05005220486992301, "learning_rate": 2.0728357493444555e-05, "loss": 0.8381, "step": 229250 }, { "epoch": 4.0250004389122, "grad_norm": 0.04514522852120793, "learning_rate": 2.072183575506608e-05, "loss": 0.8379, "step": 229260 }, { "epoch": 4.025176003792201, "grad_norm": 0.0654768182189137, "learning_rate": 2.071531503382733e-05, "loss": 0.8365, "step": 229270 }, { "epoch": 4.025351568672203, "grad_norm": 0.05395881682476808, "learning_rate": 2.070879532981094e-05, "loss": 0.8362, "step": 229280 }, { "epoch": 4.025527133552204, "grad_norm": 0.041633417103118665, "learning_rate": 2.0702276643099517e-05, "loss": 0.8513, "step": 229290 }, { "epoch": 4.025702698432205, "grad_norm": 0.05435898619722817, "learning_rate": 2.069575897377561e-05, "loss": 0.8423, "step": 229300 }, { "epoch": 4.025878263312207, "grad_norm": 0.04517912004343175, "learning_rate": 2.068924232192183e-05, "loss": 0.8424, "step": 229310 }, { "epoch": 4.026053828192208, "grad_norm": 0.04923447716152104, "learning_rate": 2.0682726687620766e-05, "loss": 0.8492, "step": 229320 }, { "epoch": 4.02622939307221, "grad_norm": 0.05167620374334098, "learning_rate": 2.067621207095493e-05, "loss": 0.8358, "step": 229330 }, { "epoch": 4.026404957952211, "grad_norm": 0.045581397119728424, "learning_rate": 2.0669698472006875e-05, "loss": 0.8435, "step": 229340 }, { "epoch": 4.026580522832212, "grad_norm": 0.05306434862809325, "learning_rate": 2.0663185890859135e-05, "loss": 0.8328, "step": 229350 }, { "epoch": 4.026756087712214, "grad_norm": 0.04940556528165706, "learning_rate": 2.065667432759425e-05, "loss": 0.8377, "step": 229360 }, { "epoch": 4.026931652592215, "grad_norm": 0.05481786993906389, "learning_rate": 2.0650163782294673e-05, "loss": 0.8396, "step": 229370 }, { "epoch": 4.027107217472217, "grad_norm": 0.04559542492497215, "learning_rate": 2.0643654255042926e-05, "loss": 0.8397, "step": 229380 }, { "epoch": 4.027282782352218, "grad_norm": 0.05170557756554042, "learning_rate": 2.0637145745921502e-05, "loss": 0.8307, "step": 229390 }, { "epoch": 4.02745834723222, "grad_norm": 0.052129619640951495, "learning_rate": 2.063063825501281e-05, "loss": 0.8402, "step": 229400 }, { "epoch": 4.027633912112221, "grad_norm": 0.05914574118171682, "learning_rate": 2.062413178239935e-05, "loss": 0.8431, "step": 229410 }, { "epoch": 4.027809476992222, "grad_norm": 0.05003056827678073, "learning_rate": 2.0617626328163554e-05, "loss": 0.8299, "step": 229420 }, { "epoch": 4.027985041872224, "grad_norm": 0.061301070887270846, "learning_rate": 2.0611121892387815e-05, "loss": 0.8397, "step": 229430 }, { "epoch": 4.028160606752225, "grad_norm": 0.04976110724961048, "learning_rate": 2.060461847515458e-05, "loss": 0.836, "step": 229440 }, { "epoch": 4.028336171632227, "grad_norm": 0.062433010457655244, "learning_rate": 2.0598116076546224e-05, "loss": 0.8385, "step": 229450 }, { "epoch": 4.028511736512228, "grad_norm": 0.05803487456801171, "learning_rate": 2.059161469664518e-05, "loss": 0.8362, "step": 229460 }, { "epoch": 4.028687301392229, "grad_norm": 0.05662005126322928, "learning_rate": 2.0585114335533757e-05, "loss": 0.8328, "step": 229470 }, { "epoch": 4.028862866272231, "grad_norm": 0.04094585809049682, "learning_rate": 2.057861499329435e-05, "loss": 0.839, "step": 229480 }, { "epoch": 4.029038431152232, "grad_norm": 0.04869585088897707, "learning_rate": 2.0572116670009325e-05, "loss": 0.8371, "step": 229490 }, { "epoch": 4.029213996032234, "grad_norm": 0.04839143705080867, "learning_rate": 2.056561936576097e-05, "loss": 0.8431, "step": 229500 }, { "epoch": 4.029389560912235, "grad_norm": 0.06038446338335348, "learning_rate": 2.055912308063164e-05, "loss": 0.8389, "step": 229510 }, { "epoch": 4.029565125792237, "grad_norm": 0.05759718577929173, "learning_rate": 2.0552627814703632e-05, "loss": 0.8373, "step": 229520 }, { "epoch": 4.029740690672238, "grad_norm": 0.05252545832095007, "learning_rate": 2.054613356805927e-05, "loss": 0.832, "step": 229530 }, { "epoch": 4.029916255552239, "grad_norm": 0.06055301627537975, "learning_rate": 2.05396403407808e-05, "loss": 0.8376, "step": 229540 }, { "epoch": 4.030091820432241, "grad_norm": 0.053585757271932835, "learning_rate": 2.0533148132950494e-05, "loss": 0.8347, "step": 229550 }, { "epoch": 4.030267385312242, "grad_norm": 0.04672589726295374, "learning_rate": 2.0526656944650656e-05, "loss": 0.8428, "step": 229560 }, { "epoch": 4.030442950192244, "grad_norm": 0.043816027513629896, "learning_rate": 2.052016677596347e-05, "loss": 0.8468, "step": 229570 }, { "epoch": 4.030618515072245, "grad_norm": 0.053921235018129864, "learning_rate": 2.05136776269712e-05, "loss": 0.8395, "step": 229580 }, { "epoch": 4.030794079952246, "grad_norm": 0.04754632932991002, "learning_rate": 2.050718949775607e-05, "loss": 0.8338, "step": 229590 }, { "epoch": 4.030969644832248, "grad_norm": 0.05915963759175247, "learning_rate": 2.050070238840026e-05, "loss": 0.8441, "step": 229600 }, { "epoch": 4.031145209712249, "grad_norm": 0.04625418285806889, "learning_rate": 2.0494216298985976e-05, "loss": 0.8338, "step": 229610 }, { "epoch": 4.031320774592251, "grad_norm": 0.06346793586062899, "learning_rate": 2.048773122959541e-05, "loss": 0.8425, "step": 229620 }, { "epoch": 4.031496339472252, "grad_norm": 0.04412300622396203, "learning_rate": 2.0481247180310738e-05, "loss": 0.8387, "step": 229630 }, { "epoch": 4.031671904352254, "grad_norm": 0.05812929567732477, "learning_rate": 2.047476415121406e-05, "loss": 0.8407, "step": 229640 }, { "epoch": 4.031847469232255, "grad_norm": 0.0601840727737345, "learning_rate": 2.046828214238757e-05, "loss": 0.8339, "step": 229650 }, { "epoch": 4.032023034112256, "grad_norm": 0.04279342620119814, "learning_rate": 2.0461801153913392e-05, "loss": 0.8394, "step": 229660 }, { "epoch": 4.032198598992258, "grad_norm": 0.053087433378207746, "learning_rate": 2.045532118587361e-05, "loss": 0.8354, "step": 229670 }, { "epoch": 4.032374163872259, "grad_norm": 0.05272274495130778, "learning_rate": 2.044884223835034e-05, "loss": 0.8426, "step": 229680 }, { "epoch": 4.032549728752261, "grad_norm": 0.0477275245628393, "learning_rate": 2.0442364311425677e-05, "loss": 0.8357, "step": 229690 }, { "epoch": 4.032725293632262, "grad_norm": 0.0695955560168129, "learning_rate": 2.043588740518172e-05, "loss": 0.8347, "step": 229700 }, { "epoch": 4.032900858512264, "grad_norm": 0.052668811750667455, "learning_rate": 2.0429411519700488e-05, "loss": 0.8422, "step": 229710 }, { "epoch": 4.033076423392265, "grad_norm": 0.04830158515900441, "learning_rate": 2.0422936655064055e-05, "loss": 0.8402, "step": 229720 }, { "epoch": 4.033251988272266, "grad_norm": 0.0514384340550545, "learning_rate": 2.041646281135449e-05, "loss": 0.837, "step": 229730 }, { "epoch": 4.033427553152268, "grad_norm": 0.04982723282642766, "learning_rate": 2.040998998865374e-05, "loss": 0.8386, "step": 229740 }, { "epoch": 4.033603118032269, "grad_norm": 0.05509410079303451, "learning_rate": 2.0403518187043883e-05, "loss": 0.8357, "step": 229750 }, { "epoch": 4.033778682912271, "grad_norm": 0.0768989395391401, "learning_rate": 2.0397047406606908e-05, "loss": 0.8407, "step": 229760 }, { "epoch": 4.033954247792272, "grad_norm": 0.06838177819649217, "learning_rate": 2.0390577647424773e-05, "loss": 0.8405, "step": 229770 }, { "epoch": 4.034129812672273, "grad_norm": 0.07068902917764515, "learning_rate": 2.038410890957946e-05, "loss": 0.8422, "step": 229780 }, { "epoch": 4.034305377552275, "grad_norm": 0.048590413878441624, "learning_rate": 2.037764119315294e-05, "loss": 0.8431, "step": 229790 }, { "epoch": 4.034480942432276, "grad_norm": 0.043272060083146865, "learning_rate": 2.0371174498227176e-05, "loss": 0.8349, "step": 229800 }, { "epoch": 4.0346565073122775, "grad_norm": 0.057516357336976714, "learning_rate": 2.0364708824884066e-05, "loss": 0.8377, "step": 229810 }, { "epoch": 4.034832072192279, "grad_norm": 0.04684869946905158, "learning_rate": 2.0358244173205557e-05, "loss": 0.8364, "step": 229820 }, { "epoch": 4.0350076370722805, "grad_norm": 0.053271513842218045, "learning_rate": 2.0351780543273562e-05, "loss": 0.8466, "step": 229830 }, { "epoch": 4.0351832019522815, "grad_norm": 0.07199149123656212, "learning_rate": 2.0345317935169944e-05, "loss": 0.8418, "step": 229840 }, { "epoch": 4.035358766832283, "grad_norm": 0.04472567682236191, "learning_rate": 2.0338856348976597e-05, "loss": 0.8303, "step": 229850 }, { "epoch": 4.0355343317122845, "grad_norm": 0.050332364208328526, "learning_rate": 2.03323957847754e-05, "loss": 0.8294, "step": 229860 }, { "epoch": 4.0357098965922855, "grad_norm": 0.06642933934758856, "learning_rate": 2.0325936242648247e-05, "loss": 0.8394, "step": 229870 }, { "epoch": 4.035885461472287, "grad_norm": 0.05359448357430296, "learning_rate": 2.031947772267691e-05, "loss": 0.8362, "step": 229880 }, { "epoch": 4.0360610263522885, "grad_norm": 0.0505188978734351, "learning_rate": 2.031302022494325e-05, "loss": 0.8403, "step": 229890 }, { "epoch": 4.0362365912322895, "grad_norm": 0.04318628101969457, "learning_rate": 2.0306563749529113e-05, "loss": 0.8348, "step": 229900 }, { "epoch": 4.036412156112291, "grad_norm": 0.055413898845277906, "learning_rate": 2.0300108296516247e-05, "loss": 0.8411, "step": 229910 }, { "epoch": 4.0365877209922925, "grad_norm": 0.05096788522882167, "learning_rate": 2.0293653865986485e-05, "loss": 0.8327, "step": 229920 }, { "epoch": 4.036763285872294, "grad_norm": 0.06667513818980107, "learning_rate": 2.028720045802162e-05, "loss": 0.8406, "step": 229930 }, { "epoch": 4.036938850752295, "grad_norm": 0.05396540024801403, "learning_rate": 2.0280748072703364e-05, "loss": 0.8375, "step": 229940 }, { "epoch": 4.037114415632297, "grad_norm": 0.06333452103054876, "learning_rate": 2.027429671011351e-05, "loss": 0.8469, "step": 229950 }, { "epoch": 4.037289980512298, "grad_norm": 0.04528334837384292, "learning_rate": 2.026784637033378e-05, "loss": 0.8361, "step": 229960 }, { "epoch": 4.037465545392299, "grad_norm": 0.04951291152002022, "learning_rate": 2.0261397053445926e-05, "loss": 0.8338, "step": 229970 }, { "epoch": 4.037641110272301, "grad_norm": 0.0618531626243401, "learning_rate": 2.0254948759531634e-05, "loss": 0.839, "step": 229980 }, { "epoch": 4.037816675152302, "grad_norm": 0.05484235075869022, "learning_rate": 2.0248501488672606e-05, "loss": 0.8355, "step": 229990 }, { "epoch": 4.037992240032304, "grad_norm": 0.06750076993132127, "learning_rate": 2.0242055240950567e-05, "loss": 0.8332, "step": 230000 }, { "epoch": 4.038167804912305, "grad_norm": 0.049873657401548185, "learning_rate": 2.023561001644714e-05, "loss": 0.8509, "step": 230010 }, { "epoch": 4.038343369792306, "grad_norm": 0.05452785793869996, "learning_rate": 2.022916581524402e-05, "loss": 0.8391, "step": 230020 }, { "epoch": 4.038518934672308, "grad_norm": 0.057166574503669285, "learning_rate": 2.0222722637422846e-05, "loss": 0.8415, "step": 230030 }, { "epoch": 4.038694499552309, "grad_norm": 0.046948715657761854, "learning_rate": 2.0216280483065275e-05, "loss": 0.8389, "step": 230040 }, { "epoch": 4.038870064432311, "grad_norm": 0.04258036543896514, "learning_rate": 2.020983935225289e-05, "loss": 0.8374, "step": 230050 }, { "epoch": 4.039045629312312, "grad_norm": 0.05209110647792407, "learning_rate": 2.0203399245067324e-05, "loss": 0.8373, "step": 230060 }, { "epoch": 4.039221194192314, "grad_norm": 0.048227514964742646, "learning_rate": 2.0196960161590197e-05, "loss": 0.8443, "step": 230070 }, { "epoch": 4.039396759072315, "grad_norm": 0.05012820101499754, "learning_rate": 2.019052210190306e-05, "loss": 0.8364, "step": 230080 }, { "epoch": 4.039572323952316, "grad_norm": 0.04181778123755861, "learning_rate": 2.0184085066087477e-05, "loss": 0.8431, "step": 230090 }, { "epoch": 4.039747888832318, "grad_norm": 0.05299582869710773, "learning_rate": 2.017764905422506e-05, "loss": 0.8466, "step": 230100 }, { "epoch": 4.039923453712319, "grad_norm": 0.05312111860646189, "learning_rate": 2.0171214066397286e-05, "loss": 0.8323, "step": 230110 }, { "epoch": 4.040099018592321, "grad_norm": 0.052189424024329197, "learning_rate": 2.0164780102685732e-05, "loss": 0.8305, "step": 230120 }, { "epoch": 4.040274583472322, "grad_norm": 0.048171353266782006, "learning_rate": 2.0158347163171897e-05, "loss": 0.8376, "step": 230130 }, { "epoch": 4.040450148352323, "grad_norm": 0.05319065647007432, "learning_rate": 2.0151915247937325e-05, "loss": 0.8378, "step": 230140 }, { "epoch": 4.040625713232325, "grad_norm": 0.06024830298609819, "learning_rate": 2.014548435706345e-05, "loss": 0.8444, "step": 230150 }, { "epoch": 4.040801278112326, "grad_norm": 0.0514173042108325, "learning_rate": 2.0139054490631786e-05, "loss": 0.8457, "step": 230160 }, { "epoch": 4.040976842992328, "grad_norm": 0.05107704039166751, "learning_rate": 2.0132625648723828e-05, "loss": 0.8362, "step": 230170 }, { "epoch": 4.041152407872329, "grad_norm": 0.05095917109593666, "learning_rate": 2.0126197831420977e-05, "loss": 0.8412, "step": 230180 }, { "epoch": 4.041327972752331, "grad_norm": 0.06418623251053185, "learning_rate": 2.0119771038804694e-05, "loss": 0.8371, "step": 230190 }, { "epoch": 4.041503537632332, "grad_norm": 0.050206475350025224, "learning_rate": 2.011334527095642e-05, "loss": 0.8406, "step": 230200 }, { "epoch": 4.041679102512333, "grad_norm": 0.054709607289363166, "learning_rate": 2.0106920527957588e-05, "loss": 0.8318, "step": 230210 }, { "epoch": 4.041854667392335, "grad_norm": 0.05724008255540709, "learning_rate": 2.0100496809889547e-05, "loss": 0.8429, "step": 230220 }, { "epoch": 4.042030232272336, "grad_norm": 0.055719897566878195, "learning_rate": 2.0094074116833714e-05, "loss": 0.8378, "step": 230230 }, { "epoch": 4.042205797152338, "grad_norm": 0.06624272850085407, "learning_rate": 2.0087652448871486e-05, "loss": 0.8441, "step": 230240 }, { "epoch": 4.042381362032339, "grad_norm": 0.05206164759595803, "learning_rate": 2.0081231806084195e-05, "loss": 0.8411, "step": 230250 }, { "epoch": 4.04255692691234, "grad_norm": 0.0528436280273677, "learning_rate": 2.0074812188553196e-05, "loss": 0.8329, "step": 230260 }, { "epoch": 4.042732491792342, "grad_norm": 0.04788627971280511, "learning_rate": 2.0068393596359872e-05, "loss": 0.8431, "step": 230270 }, { "epoch": 4.042908056672343, "grad_norm": 0.04940408867778806, "learning_rate": 2.0061976029585478e-05, "loss": 0.8388, "step": 230280 }, { "epoch": 4.043083621552345, "grad_norm": 0.054902354716712884, "learning_rate": 2.005555948831136e-05, "loss": 0.8368, "step": 230290 }, { "epoch": 4.043259186432346, "grad_norm": 0.06202236078802633, "learning_rate": 2.004914397261882e-05, "loss": 0.8421, "step": 230300 }, { "epoch": 4.043434751312348, "grad_norm": 0.045171609344725555, "learning_rate": 2.0042729482589164e-05, "loss": 0.8368, "step": 230310 }, { "epoch": 4.043610316192349, "grad_norm": 0.05482344779950296, "learning_rate": 2.003631601830361e-05, "loss": 0.8388, "step": 230320 }, { "epoch": 4.04378588107235, "grad_norm": 0.06530297545001046, "learning_rate": 2.0029903579843456e-05, "loss": 0.832, "step": 230330 }, { "epoch": 4.043961445952352, "grad_norm": 0.05697092816764783, "learning_rate": 2.0023492167289963e-05, "loss": 0.8281, "step": 230340 }, { "epoch": 4.044137010832353, "grad_norm": 0.0765800560707525, "learning_rate": 2.001708178072432e-05, "loss": 0.8413, "step": 230350 }, { "epoch": 4.044312575712355, "grad_norm": 0.045872073450713785, "learning_rate": 2.001067242022777e-05, "loss": 0.8385, "step": 230360 }, { "epoch": 4.044488140592356, "grad_norm": 0.047939326991458274, "learning_rate": 2.000426408588153e-05, "loss": 0.8386, "step": 230370 }, { "epoch": 4.044663705472357, "grad_norm": 0.049990038238962906, "learning_rate": 1.9997856777766798e-05, "loss": 0.8311, "step": 230380 }, { "epoch": 4.044839270352359, "grad_norm": 0.049917642562413024, "learning_rate": 1.9991450495964726e-05, "loss": 0.8387, "step": 230390 }, { "epoch": 4.04501483523236, "grad_norm": 0.04712547378928479, "learning_rate": 1.9985045240556506e-05, "loss": 0.8335, "step": 230400 }, { "epoch": 4.045190400112362, "grad_norm": 0.05082934902077995, "learning_rate": 1.9978641011623302e-05, "loss": 0.8478, "step": 230410 }, { "epoch": 4.045365964992363, "grad_norm": 0.04641030214395614, "learning_rate": 1.9972237809246228e-05, "loss": 0.8405, "step": 230420 }, { "epoch": 4.045541529872365, "grad_norm": 0.054738622603056536, "learning_rate": 1.9965835633506438e-05, "loss": 0.8414, "step": 230430 }, { "epoch": 4.045717094752366, "grad_norm": 0.07488425229863281, "learning_rate": 1.9959434484485055e-05, "loss": 0.8487, "step": 230440 }, { "epoch": 4.045892659632367, "grad_norm": 0.07982584572235056, "learning_rate": 1.9953034362263147e-05, "loss": 0.8386, "step": 230450 }, { "epoch": 4.046068224512369, "grad_norm": 0.04524325877420843, "learning_rate": 1.994663526692182e-05, "loss": 0.8297, "step": 230460 }, { "epoch": 4.04624378939237, "grad_norm": 0.04216529853922622, "learning_rate": 1.9940237198542165e-05, "loss": 0.8398, "step": 230470 }, { "epoch": 4.046419354272372, "grad_norm": 0.04192049900651561, "learning_rate": 1.9933840157205254e-05, "loss": 0.8377, "step": 230480 }, { "epoch": 4.046594919152373, "grad_norm": 0.048124354817627724, "learning_rate": 1.9927444142992103e-05, "loss": 0.8331, "step": 230490 }, { "epoch": 4.046770484032375, "grad_norm": 0.051050015380614856, "learning_rate": 1.992104915598378e-05, "loss": 0.8371, "step": 230500 }, { "epoch": 4.046946048912376, "grad_norm": 0.053698282029411806, "learning_rate": 1.9914655196261326e-05, "loss": 0.8451, "step": 230510 }, { "epoch": 4.047121613792377, "grad_norm": 0.05529156612040191, "learning_rate": 1.99082622639057e-05, "loss": 0.8345, "step": 230520 }, { "epoch": 4.047297178672379, "grad_norm": 0.06273283350336162, "learning_rate": 1.990187035899794e-05, "loss": 0.8454, "step": 230530 }, { "epoch": 4.04747274355238, "grad_norm": 0.055558940827344735, "learning_rate": 1.9895479481619026e-05, "loss": 0.8375, "step": 230540 }, { "epoch": 4.0476483084323815, "grad_norm": 0.047528202833406606, "learning_rate": 1.9889089631849942e-05, "loss": 0.831, "step": 230550 }, { "epoch": 4.047823873312383, "grad_norm": 0.14866635725108476, "learning_rate": 1.9882700809771628e-05, "loss": 0.8404, "step": 230560 }, { "epoch": 4.047999438192384, "grad_norm": 0.04182305810595356, "learning_rate": 1.987631301546503e-05, "loss": 0.8389, "step": 230570 }, { "epoch": 4.0481750030723855, "grad_norm": 0.061612582197164166, "learning_rate": 1.9869926249011117e-05, "loss": 0.8394, "step": 230580 }, { "epoch": 4.048350567952387, "grad_norm": 0.04732722722980612, "learning_rate": 1.986354051049077e-05, "loss": 0.8399, "step": 230590 }, { "epoch": 4.0485261328323885, "grad_norm": 0.04893108330147805, "learning_rate": 1.9857155799984913e-05, "loss": 0.8417, "step": 230600 }, { "epoch": 4.0487016977123895, "grad_norm": 0.051792623485462, "learning_rate": 1.985077211757447e-05, "loss": 0.8418, "step": 230610 }, { "epoch": 4.0488772625923914, "grad_norm": 0.04715118133839669, "learning_rate": 1.9844389463340266e-05, "loss": 0.842, "step": 230620 }, { "epoch": 4.0490528274723925, "grad_norm": 0.060536656146474556, "learning_rate": 1.9838007837363197e-05, "loss": 0.841, "step": 230630 }, { "epoch": 4.0492283923523935, "grad_norm": 0.04782490086695707, "learning_rate": 1.9831627239724136e-05, "loss": 0.8443, "step": 230640 }, { "epoch": 4.049403957232395, "grad_norm": 0.04754082076086008, "learning_rate": 1.9825247670503925e-05, "loss": 0.8382, "step": 230650 }, { "epoch": 4.0495795221123965, "grad_norm": 0.04948610023088782, "learning_rate": 1.981886912978337e-05, "loss": 0.8377, "step": 230660 }, { "epoch": 4.049755086992398, "grad_norm": 0.05000974842035047, "learning_rate": 1.9812491617643292e-05, "loss": 0.84, "step": 230670 }, { "epoch": 4.049930651872399, "grad_norm": 0.0451623428361975, "learning_rate": 1.9806115134164538e-05, "loss": 0.8349, "step": 230680 }, { "epoch": 4.0501062167524005, "grad_norm": 0.04658477130723671, "learning_rate": 1.9799739679427827e-05, "loss": 0.8342, "step": 230690 }, { "epoch": 4.050281781632402, "grad_norm": 0.05599991882704879, "learning_rate": 1.9793365253513997e-05, "loss": 0.846, "step": 230700 }, { "epoch": 4.050457346512403, "grad_norm": 0.04953797933931095, "learning_rate": 1.978699185650378e-05, "loss": 0.84, "step": 230710 }, { "epoch": 4.050632911392405, "grad_norm": 0.05662331545608743, "learning_rate": 1.9780619488477965e-05, "loss": 0.8406, "step": 230720 }, { "epoch": 4.050808476272406, "grad_norm": 0.05575493405496299, "learning_rate": 1.9774248149517245e-05, "loss": 0.8446, "step": 230730 }, { "epoch": 4.050984041152408, "grad_norm": 0.06226016212259846, "learning_rate": 1.9767877839702363e-05, "loss": 0.8345, "step": 230740 }, { "epoch": 4.051159606032409, "grad_norm": 0.05724124131392584, "learning_rate": 1.9761508559114067e-05, "loss": 0.8455, "step": 230750 }, { "epoch": 4.05133517091241, "grad_norm": 0.045820814214711396, "learning_rate": 1.9755140307833e-05, "loss": 0.8346, "step": 230760 }, { "epoch": 4.051510735792412, "grad_norm": 0.07422352317505747, "learning_rate": 1.9748773085939874e-05, "loss": 0.832, "step": 230770 }, { "epoch": 4.051686300672413, "grad_norm": 0.055134903917068726, "learning_rate": 1.974240689351539e-05, "loss": 0.8356, "step": 230780 }, { "epoch": 4.051861865552415, "grad_norm": 0.04688600175498848, "learning_rate": 1.973604173064017e-05, "loss": 0.8433, "step": 230790 }, { "epoch": 4.052037430432416, "grad_norm": 0.047104197131246084, "learning_rate": 1.972967759739486e-05, "loss": 0.8385, "step": 230800 }, { "epoch": 4.052212995312417, "grad_norm": 0.04483057720051681, "learning_rate": 1.9723314493860113e-05, "loss": 0.8434, "step": 230810 }, { "epoch": 4.052388560192419, "grad_norm": 0.04808688767881454, "learning_rate": 1.9716952420116585e-05, "loss": 0.8419, "step": 230820 }, { "epoch": 4.05256412507242, "grad_norm": 0.04845711607550383, "learning_rate": 1.9710591376244814e-05, "loss": 0.8434, "step": 230830 }, { "epoch": 4.052739689952422, "grad_norm": 0.05978333639268888, "learning_rate": 1.970423136232543e-05, "loss": 0.8436, "step": 230840 }, { "epoch": 4.052915254832423, "grad_norm": 0.04793370218551738, "learning_rate": 1.9697872378439027e-05, "loss": 0.8376, "step": 230850 }, { "epoch": 4.053090819712425, "grad_norm": 0.06817083177106423, "learning_rate": 1.9691514424666156e-05, "loss": 0.8408, "step": 230860 }, { "epoch": 4.053266384592426, "grad_norm": 0.042440060326242195, "learning_rate": 1.9685157501087366e-05, "loss": 0.8423, "step": 230870 }, { "epoch": 4.053441949472427, "grad_norm": 0.053302001603458264, "learning_rate": 1.9678801607783228e-05, "loss": 0.8348, "step": 230880 }, { "epoch": 4.053617514352429, "grad_norm": 0.05105966591051054, "learning_rate": 1.9672446744834276e-05, "loss": 0.8464, "step": 230890 }, { "epoch": 4.05379307923243, "grad_norm": 0.05536884241687365, "learning_rate": 1.966609291232099e-05, "loss": 0.8449, "step": 230900 }, { "epoch": 4.053968644112432, "grad_norm": 0.04680235134077853, "learning_rate": 1.965974011032389e-05, "loss": 0.8439, "step": 230910 }, { "epoch": 4.054144208992433, "grad_norm": 0.052827786558423184, "learning_rate": 1.9653388338923494e-05, "loss": 0.8377, "step": 230920 }, { "epoch": 4.054319773872434, "grad_norm": 0.05925802334627201, "learning_rate": 1.9647037598200236e-05, "loss": 0.8467, "step": 230930 }, { "epoch": 4.054495338752436, "grad_norm": 0.04734261710690397, "learning_rate": 1.964068788823461e-05, "loss": 0.8473, "step": 230940 }, { "epoch": 4.054670903632437, "grad_norm": 0.057233015521233443, "learning_rate": 1.9634339209107074e-05, "loss": 0.8433, "step": 230950 }, { "epoch": 4.054846468512439, "grad_norm": 0.050791406646814384, "learning_rate": 1.9627991560898033e-05, "loss": 0.8336, "step": 230960 }, { "epoch": 4.05502203339244, "grad_norm": 0.0511118396481289, "learning_rate": 1.9621644943687937e-05, "loss": 0.8347, "step": 230970 }, { "epoch": 4.055197598272442, "grad_norm": 0.07123364151921885, "learning_rate": 1.961529935755719e-05, "loss": 0.8365, "step": 230980 }, { "epoch": 4.055373163152443, "grad_norm": 0.04283127142495337, "learning_rate": 1.9608954802586224e-05, "loss": 0.8349, "step": 230990 }, { "epoch": 4.055548728032444, "grad_norm": 0.04446109402751642, "learning_rate": 1.9602611278855384e-05, "loss": 0.8376, "step": 231000 }, { "epoch": 4.055724292912446, "grad_norm": 0.04441492431452363, "learning_rate": 1.959626878644504e-05, "loss": 0.8401, "step": 231010 }, { "epoch": 4.055899857792447, "grad_norm": 0.048440088970209856, "learning_rate": 1.9589927325435616e-05, "loss": 0.8297, "step": 231020 }, { "epoch": 4.056075422672449, "grad_norm": 0.05341169988061649, "learning_rate": 1.9583586895907377e-05, "loss": 0.843, "step": 231030 }, { "epoch": 4.05625098755245, "grad_norm": 0.05490835406372275, "learning_rate": 1.95772474979407e-05, "loss": 0.8429, "step": 231040 }, { "epoch": 4.056426552432451, "grad_norm": 0.07448919092952341, "learning_rate": 1.9570909131615907e-05, "loss": 0.8439, "step": 231050 }, { "epoch": 4.056602117312453, "grad_norm": 0.05444306030800364, "learning_rate": 1.956457179701332e-05, "loss": 0.8443, "step": 231060 }, { "epoch": 4.056777682192454, "grad_norm": 0.0527080771243837, "learning_rate": 1.955823549421319e-05, "loss": 0.8458, "step": 231070 }, { "epoch": 4.056953247072456, "grad_norm": 0.053211752840372786, "learning_rate": 1.955190022329582e-05, "loss": 0.8483, "step": 231080 }, { "epoch": 4.057128811952457, "grad_norm": 0.05544859020631584, "learning_rate": 1.9545565984341513e-05, "loss": 0.8384, "step": 231090 }, { "epoch": 4.057304376832459, "grad_norm": 0.07032927052062285, "learning_rate": 1.9539232777430476e-05, "loss": 0.8377, "step": 231100 }, { "epoch": 4.05747994171246, "grad_norm": 0.05242504132075124, "learning_rate": 1.9532900602642967e-05, "loss": 0.8443, "step": 231110 }, { "epoch": 4.057655506592461, "grad_norm": 0.05028043223345739, "learning_rate": 1.952656946005925e-05, "loss": 0.8395, "step": 231120 }, { "epoch": 4.057831071472463, "grad_norm": 0.052988688941566865, "learning_rate": 1.952023934975948e-05, "loss": 0.8321, "step": 231130 }, { "epoch": 4.058006636352464, "grad_norm": 0.04472317757276596, "learning_rate": 1.951391027182389e-05, "loss": 0.8398, "step": 231140 }, { "epoch": 4.058182201232466, "grad_norm": 0.04369705893361105, "learning_rate": 1.950758222633268e-05, "loss": 0.8495, "step": 231150 }, { "epoch": 4.058357766112467, "grad_norm": 0.06636777723131143, "learning_rate": 1.9501255213366047e-05, "loss": 0.8369, "step": 231160 }, { "epoch": 4.058533330992468, "grad_norm": 0.049957364971525545, "learning_rate": 1.9494929233004105e-05, "loss": 0.8379, "step": 231170 }, { "epoch": 4.05870889587247, "grad_norm": 0.04753568075900819, "learning_rate": 1.948860428532703e-05, "loss": 0.8354, "step": 231180 }, { "epoch": 4.058884460752471, "grad_norm": 0.055880241524895374, "learning_rate": 1.948228037041499e-05, "loss": 0.8439, "step": 231190 }, { "epoch": 4.059060025632473, "grad_norm": 0.047787024129376146, "learning_rate": 1.9475957488348044e-05, "loss": 0.8361, "step": 231200 }, { "epoch": 4.059235590512474, "grad_norm": 0.044636138014736884, "learning_rate": 1.9469635639206342e-05, "loss": 0.8384, "step": 231210 }, { "epoch": 4.059411155392476, "grad_norm": 0.06093169862835277, "learning_rate": 1.9463314823069988e-05, "loss": 0.8349, "step": 231220 }, { "epoch": 4.059586720272477, "grad_norm": 0.05032044317949286, "learning_rate": 1.945699504001908e-05, "loss": 0.8387, "step": 231230 }, { "epoch": 4.059762285152478, "grad_norm": 0.05113714224357784, "learning_rate": 1.9450676290133648e-05, "loss": 0.8394, "step": 231240 }, { "epoch": 4.05993785003248, "grad_norm": 0.06132724645688432, "learning_rate": 1.944435857349379e-05, "loss": 0.8377, "step": 231250 }, { "epoch": 4.060113414912481, "grad_norm": 0.05012891896340847, "learning_rate": 1.943804189017955e-05, "loss": 0.848, "step": 231260 }, { "epoch": 4.060288979792483, "grad_norm": 0.05507952314200843, "learning_rate": 1.9431726240270928e-05, "loss": 0.8421, "step": 231270 }, { "epoch": 4.060464544672484, "grad_norm": 0.03815857092978236, "learning_rate": 1.9425411623847967e-05, "loss": 0.8427, "step": 231280 }, { "epoch": 4.0606401095524856, "grad_norm": 0.062066269186314006, "learning_rate": 1.94190980409907e-05, "loss": 0.8395, "step": 231290 }, { "epoch": 4.060815674432487, "grad_norm": 0.05819735507152214, "learning_rate": 1.9412785491779073e-05, "loss": 0.8331, "step": 231300 }, { "epoch": 4.060991239312488, "grad_norm": 0.05064363702879113, "learning_rate": 1.940647397629309e-05, "loss": 0.8319, "step": 231310 }, { "epoch": 4.0611668041924895, "grad_norm": 0.057143877336356785, "learning_rate": 1.9400163494612716e-05, "loss": 0.836, "step": 231320 }, { "epoch": 4.061342369072491, "grad_norm": 0.05699978276106688, "learning_rate": 1.9393854046817932e-05, "loss": 0.8397, "step": 231330 }, { "epoch": 4.0615179339524925, "grad_norm": 0.0486315362176169, "learning_rate": 1.9387545632988636e-05, "loss": 0.843, "step": 231340 }, { "epoch": 4.0616934988324935, "grad_norm": 0.046700756035497, "learning_rate": 1.9381238253204783e-05, "loss": 0.8452, "step": 231350 }, { "epoch": 4.061869063712495, "grad_norm": 0.06905920618879362, "learning_rate": 1.9374931907546304e-05, "loss": 0.8438, "step": 231360 }, { "epoch": 4.0620446285924965, "grad_norm": 0.05266336593372966, "learning_rate": 1.936862659609305e-05, "loss": 0.8466, "step": 231370 }, { "epoch": 4.0622201934724975, "grad_norm": 0.04652467494411541, "learning_rate": 1.9362322318924943e-05, "loss": 0.8335, "step": 231380 }, { "epoch": 4.062395758352499, "grad_norm": 0.06348950727008061, "learning_rate": 1.935601907612187e-05, "loss": 0.8306, "step": 231390 }, { "epoch": 4.0625713232325005, "grad_norm": 0.051892074666541346, "learning_rate": 1.93497168677637e-05, "loss": 0.8433, "step": 231400 }, { "epoch": 4.062746888112502, "grad_norm": 0.05069974333451811, "learning_rate": 1.934341569393023e-05, "loss": 0.8394, "step": 231410 }, { "epoch": 4.062922452992503, "grad_norm": 0.04401721339856393, "learning_rate": 1.9337115554701348e-05, "loss": 0.8343, "step": 231420 }, { "epoch": 4.0630980178725045, "grad_norm": 0.059747418821036444, "learning_rate": 1.933081645015688e-05, "loss": 0.842, "step": 231430 }, { "epoch": 4.063273582752506, "grad_norm": 0.04549335603673088, "learning_rate": 1.9324518380376598e-05, "loss": 0.8393, "step": 231440 }, { "epoch": 4.063449147632507, "grad_norm": 0.041985999640050925, "learning_rate": 1.931822134544032e-05, "loss": 0.835, "step": 231450 }, { "epoch": 4.063624712512509, "grad_norm": 0.05718866927110598, "learning_rate": 1.9311925345427844e-05, "loss": 0.8389, "step": 231460 }, { "epoch": 4.06380027739251, "grad_norm": 0.051040306491554305, "learning_rate": 1.9305630380418925e-05, "loss": 0.8393, "step": 231470 }, { "epoch": 4.063975842272511, "grad_norm": 0.04285964090931558, "learning_rate": 1.9299336450493313e-05, "loss": 0.8318, "step": 231480 }, { "epoch": 4.064151407152513, "grad_norm": 0.07980636467391183, "learning_rate": 1.929304355573077e-05, "loss": 0.8342, "step": 231490 }, { "epoch": 4.064326972032514, "grad_norm": 0.0491790337690851, "learning_rate": 1.9286751696211052e-05, "loss": 0.8477, "step": 231500 }, { "epoch": 4.064502536912516, "grad_norm": 0.0478867762682142, "learning_rate": 1.928046087201382e-05, "loss": 0.8527, "step": 231510 }, { "epoch": 4.064678101792517, "grad_norm": 0.06223320771924356, "learning_rate": 1.927417108321881e-05, "loss": 0.8422, "step": 231520 }, { "epoch": 4.064853666672519, "grad_norm": 0.036126257059412734, "learning_rate": 1.926788232990574e-05, "loss": 0.8429, "step": 231530 }, { "epoch": 4.06502923155252, "grad_norm": 0.05354549850779721, "learning_rate": 1.9261594612154247e-05, "loss": 0.8369, "step": 231540 }, { "epoch": 4.065204796432521, "grad_norm": 0.062409852748057174, "learning_rate": 1.925530793004401e-05, "loss": 0.8464, "step": 231550 }, { "epoch": 4.065380361312523, "grad_norm": 0.06266499684185736, "learning_rate": 1.924902228365469e-05, "loss": 0.8463, "step": 231560 }, { "epoch": 4.065555926192524, "grad_norm": 0.056758186035428815, "learning_rate": 1.9242737673065945e-05, "loss": 0.8348, "step": 231570 }, { "epoch": 4.065731491072526, "grad_norm": 0.05953266491353704, "learning_rate": 1.9236454098357363e-05, "loss": 0.8405, "step": 231580 }, { "epoch": 4.065907055952527, "grad_norm": 0.04979564114508449, "learning_rate": 1.9230171559608578e-05, "loss": 0.8437, "step": 231590 }, { "epoch": 4.066082620832528, "grad_norm": 0.06348101298652463, "learning_rate": 1.922389005689921e-05, "loss": 0.8335, "step": 231600 }, { "epoch": 4.06625818571253, "grad_norm": 0.04679999363020437, "learning_rate": 1.9217609590308803e-05, "loss": 0.8334, "step": 231610 }, { "epoch": 4.066433750592531, "grad_norm": 0.054355880006412345, "learning_rate": 1.9211330159916963e-05, "loss": 0.8369, "step": 231620 }, { "epoch": 4.066609315472533, "grad_norm": 0.050046379272353385, "learning_rate": 1.920505176580327e-05, "loss": 0.8357, "step": 231630 }, { "epoch": 4.066784880352534, "grad_norm": 0.06168276997051324, "learning_rate": 1.9198774408047208e-05, "loss": 0.8363, "step": 231640 }, { "epoch": 4.066960445232536, "grad_norm": 0.05875192513641352, "learning_rate": 1.919249808672837e-05, "loss": 0.8341, "step": 231650 }, { "epoch": 4.067136010112537, "grad_norm": 0.059584091945962585, "learning_rate": 1.9186222801926237e-05, "loss": 0.8396, "step": 231660 }, { "epoch": 4.067311574992538, "grad_norm": 0.05115839356702313, "learning_rate": 1.9179948553720383e-05, "loss": 0.8516, "step": 231670 }, { "epoch": 4.06748713987254, "grad_norm": 0.07347945363678789, "learning_rate": 1.9173675342190236e-05, "loss": 0.8432, "step": 231680 }, { "epoch": 4.067662704752541, "grad_norm": 0.05432548354276119, "learning_rate": 1.9167403167415297e-05, "loss": 0.837, "step": 231690 }, { "epoch": 4.067838269632543, "grad_norm": 0.07735568184152238, "learning_rate": 1.9161132029475076e-05, "loss": 0.8323, "step": 231700 }, { "epoch": 4.068013834512544, "grad_norm": 0.04785642859154197, "learning_rate": 1.915486192844896e-05, "loss": 0.8372, "step": 231710 }, { "epoch": 4.068189399392545, "grad_norm": 0.05019688843516327, "learning_rate": 1.9148592864416445e-05, "loss": 0.8464, "step": 231720 }, { "epoch": 4.068364964272547, "grad_norm": 0.06852023757778283, "learning_rate": 1.9142324837456935e-05, "loss": 0.8432, "step": 231730 }, { "epoch": 4.068540529152548, "grad_norm": 0.05383735275889387, "learning_rate": 1.9136057847649876e-05, "loss": 0.8367, "step": 231740 }, { "epoch": 4.06871609403255, "grad_norm": 0.06776904770809017, "learning_rate": 1.9129791895074636e-05, "loss": 0.8405, "step": 231750 }, { "epoch": 4.068891658912551, "grad_norm": 0.07389485918866771, "learning_rate": 1.912352697981063e-05, "loss": 0.835, "step": 231760 }, { "epoch": 4.069067223792553, "grad_norm": 0.06272097563954862, "learning_rate": 1.911726310193724e-05, "loss": 0.8431, "step": 231770 }, { "epoch": 4.069242788672554, "grad_norm": 0.04394470434029819, "learning_rate": 1.91110002615338e-05, "loss": 0.8383, "step": 231780 }, { "epoch": 4.069418353552555, "grad_norm": 0.05066793154899464, "learning_rate": 1.910473845867969e-05, "loss": 0.8397, "step": 231790 }, { "epoch": 4.069593918432557, "grad_norm": 0.04458892626304208, "learning_rate": 1.9098477693454246e-05, "loss": 0.8441, "step": 231800 }, { "epoch": 4.069769483312558, "grad_norm": 0.06388760762603107, "learning_rate": 1.9092217965936772e-05, "loss": 0.8379, "step": 231810 }, { "epoch": 4.06994504819256, "grad_norm": 0.04925793942506397, "learning_rate": 1.9085959276206593e-05, "loss": 0.8397, "step": 231820 }, { "epoch": 4.070120613072561, "grad_norm": 0.05650332209594922, "learning_rate": 1.907970162434301e-05, "loss": 0.8418, "step": 231830 }, { "epoch": 4.070296177952562, "grad_norm": 0.04598957616999553, "learning_rate": 1.9073445010425335e-05, "loss": 0.835, "step": 231840 }, { "epoch": 4.070471742832564, "grad_norm": 0.05757685177633966, "learning_rate": 1.9067189434532777e-05, "loss": 0.8433, "step": 231850 }, { "epoch": 4.070647307712565, "grad_norm": 0.06181082364671672, "learning_rate": 1.9060934896744632e-05, "loss": 0.8423, "step": 231860 }, { "epoch": 4.070822872592567, "grad_norm": 0.05721278379345325, "learning_rate": 1.9054681397140176e-05, "loss": 0.8397, "step": 231870 }, { "epoch": 4.070998437472568, "grad_norm": 0.05229045461944193, "learning_rate": 1.9048428935798594e-05, "loss": 0.8402, "step": 231880 }, { "epoch": 4.07117400235257, "grad_norm": 0.05003819099273758, "learning_rate": 1.904217751279911e-05, "loss": 0.8462, "step": 231890 }, { "epoch": 4.071349567232571, "grad_norm": 0.042698616241057766, "learning_rate": 1.9035927128220953e-05, "loss": 0.838, "step": 231900 }, { "epoch": 4.071525132112572, "grad_norm": 0.0619597233991457, "learning_rate": 1.9029677782143324e-05, "loss": 0.8362, "step": 231910 }, { "epoch": 4.071700696992574, "grad_norm": 0.052630695938754676, "learning_rate": 1.9023429474645375e-05, "loss": 0.8313, "step": 231920 }, { "epoch": 4.071876261872575, "grad_norm": 0.07442551294712521, "learning_rate": 1.9017182205806267e-05, "loss": 0.8409, "step": 231930 }, { "epoch": 4.072051826752577, "grad_norm": 0.059212676720308055, "learning_rate": 1.9010935975705214e-05, "loss": 0.8378, "step": 231940 }, { "epoch": 4.072227391632578, "grad_norm": 0.05104594867061404, "learning_rate": 1.9004690784421285e-05, "loss": 0.8383, "step": 231950 }, { "epoch": 4.07240295651258, "grad_norm": 0.05902508270622011, "learning_rate": 1.899844663203363e-05, "loss": 0.8466, "step": 231960 }, { "epoch": 4.072578521392581, "grad_norm": 0.05933065086260977, "learning_rate": 1.899220351862141e-05, "loss": 0.8463, "step": 231970 }, { "epoch": 4.072754086272582, "grad_norm": 0.05802793542237467, "learning_rate": 1.8985961444263655e-05, "loss": 0.842, "step": 231980 }, { "epoch": 4.072929651152584, "grad_norm": 0.057240913729545734, "learning_rate": 1.8979720409039487e-05, "loss": 0.8473, "step": 231990 }, { "epoch": 4.073105216032585, "grad_norm": 0.06780564322840385, "learning_rate": 1.8973480413027982e-05, "loss": 0.8372, "step": 232000 }, { "epoch": 4.073280780912587, "grad_norm": 0.046020965337144594, "learning_rate": 1.896724145630823e-05, "loss": 0.8401, "step": 232010 }, { "epoch": 4.073456345792588, "grad_norm": 0.05337883014248958, "learning_rate": 1.8961003538959223e-05, "loss": 0.8295, "step": 232020 }, { "epoch": 4.073631910672589, "grad_norm": 0.04651664968927143, "learning_rate": 1.8954766661060024e-05, "loss": 0.84, "step": 232030 }, { "epoch": 4.073807475552591, "grad_norm": 0.04571866301597431, "learning_rate": 1.894853082268967e-05, "loss": 0.8413, "step": 232040 }, { "epoch": 4.073983040432592, "grad_norm": 0.055881728330920796, "learning_rate": 1.8942296023927138e-05, "loss": 0.8448, "step": 232050 }, { "epoch": 4.0741586053125936, "grad_norm": 0.060752750090542484, "learning_rate": 1.8936062264851444e-05, "loss": 0.8336, "step": 232060 }, { "epoch": 4.074334170192595, "grad_norm": 0.05917656847292841, "learning_rate": 1.8929829545541564e-05, "loss": 0.8359, "step": 232070 }, { "epoch": 4.0745097350725965, "grad_norm": 0.052800191312936236, "learning_rate": 1.8923597866076506e-05, "loss": 0.8444, "step": 232080 }, { "epoch": 4.0746852999525975, "grad_norm": 0.048801409945758274, "learning_rate": 1.8917367226535156e-05, "loss": 0.8439, "step": 232090 }, { "epoch": 4.074860864832599, "grad_norm": 0.04632996164114891, "learning_rate": 1.89111376269965e-05, "loss": 0.8396, "step": 232100 }, { "epoch": 4.0750364297126005, "grad_norm": 0.045791933478224356, "learning_rate": 1.8904909067539487e-05, "loss": 0.8355, "step": 232110 }, { "epoch": 4.0752119945926015, "grad_norm": 0.05753970192290261, "learning_rate": 1.8898681548242987e-05, "loss": 0.8409, "step": 232120 }, { "epoch": 4.0753875594726034, "grad_norm": 0.06164038044609515, "learning_rate": 1.889245506918593e-05, "loss": 0.8369, "step": 232130 }, { "epoch": 4.0755631243526045, "grad_norm": 0.058160504381626225, "learning_rate": 1.888622963044722e-05, "loss": 0.8425, "step": 232140 }, { "epoch": 4.0757386892326055, "grad_norm": 0.05174820248214816, "learning_rate": 1.8880005232105695e-05, "loss": 0.8418, "step": 232150 }, { "epoch": 4.075914254112607, "grad_norm": 0.04717376238371022, "learning_rate": 1.887378187424024e-05, "loss": 0.8446, "step": 232160 }, { "epoch": 4.0760898189926085, "grad_norm": 0.04291939495297339, "learning_rate": 1.8867559556929712e-05, "loss": 0.8353, "step": 232170 }, { "epoch": 4.07626538387261, "grad_norm": 0.05839313676528621, "learning_rate": 1.8861338280252972e-05, "loss": 0.8342, "step": 232180 }, { "epoch": 4.076440948752611, "grad_norm": 0.060954301816119674, "learning_rate": 1.8855118044288792e-05, "loss": 0.8378, "step": 232190 }, { "epoch": 4.076616513632613, "grad_norm": 0.06137332011720221, "learning_rate": 1.8848898849115997e-05, "loss": 0.8445, "step": 232200 }, { "epoch": 4.076792078512614, "grad_norm": 0.06421750133589402, "learning_rate": 1.8842680694813433e-05, "loss": 0.8385, "step": 232210 }, { "epoch": 4.076967643392615, "grad_norm": 0.057943836089584984, "learning_rate": 1.8836463581459828e-05, "loss": 0.8409, "step": 232220 }, { "epoch": 4.077143208272617, "grad_norm": 0.05570686029253389, "learning_rate": 1.8830247509133974e-05, "loss": 0.8366, "step": 232230 }, { "epoch": 4.077318773152618, "grad_norm": 0.0868491835881463, "learning_rate": 1.8824032477914626e-05, "loss": 0.8477, "step": 232240 }, { "epoch": 4.07749433803262, "grad_norm": 0.04862829005910427, "learning_rate": 1.881781848788057e-05, "loss": 0.8466, "step": 232250 }, { "epoch": 4.077669902912621, "grad_norm": 0.0594013112575181, "learning_rate": 1.8811605539110472e-05, "loss": 0.8392, "step": 232260 }, { "epoch": 4.077845467792622, "grad_norm": 0.058358232688048, "learning_rate": 1.880539363168309e-05, "loss": 0.8422, "step": 232270 }, { "epoch": 4.078021032672624, "grad_norm": 0.04362042424506637, "learning_rate": 1.8799182765677142e-05, "loss": 0.8422, "step": 232280 }, { "epoch": 4.078196597552625, "grad_norm": 0.04689758451234852, "learning_rate": 1.8792972941171275e-05, "loss": 0.8382, "step": 232290 }, { "epoch": 4.078372162432627, "grad_norm": 0.05005749640259704, "learning_rate": 1.8786764158244195e-05, "loss": 0.8376, "step": 232300 }, { "epoch": 4.078547727312628, "grad_norm": 0.053901677462497695, "learning_rate": 1.8780556416974602e-05, "loss": 0.8418, "step": 232310 }, { "epoch": 4.07872329219263, "grad_norm": 0.054742945416316996, "learning_rate": 1.877434971744109e-05, "loss": 0.843, "step": 232320 }, { "epoch": 4.078898857072631, "grad_norm": 0.05518309368364387, "learning_rate": 1.8768144059722322e-05, "loss": 0.8401, "step": 232330 }, { "epoch": 4.079074421952632, "grad_norm": 0.07732217486196705, "learning_rate": 1.8761939443896924e-05, "loss": 0.8303, "step": 232340 }, { "epoch": 4.079249986832634, "grad_norm": 0.05507932173267617, "learning_rate": 1.8755735870043542e-05, "loss": 0.8412, "step": 232350 }, { "epoch": 4.079425551712635, "grad_norm": 0.05008629649150695, "learning_rate": 1.8749533338240728e-05, "loss": 0.8386, "step": 232360 }, { "epoch": 4.079601116592637, "grad_norm": 0.0479942088328177, "learning_rate": 1.8743331848567092e-05, "loss": 0.8336, "step": 232370 }, { "epoch": 4.079776681472638, "grad_norm": 0.039527362526217386, "learning_rate": 1.8737131401101226e-05, "loss": 0.8358, "step": 232380 }, { "epoch": 4.079952246352639, "grad_norm": 0.052550855386714744, "learning_rate": 1.8730931995921643e-05, "loss": 0.846, "step": 232390 }, { "epoch": 4.080127811232641, "grad_norm": 0.06623227345928479, "learning_rate": 1.8724733633106927e-05, "loss": 0.8433, "step": 232400 }, { "epoch": 4.080303376112642, "grad_norm": 0.06683389561589564, "learning_rate": 1.8718536312735598e-05, "loss": 0.8419, "step": 232410 }, { "epoch": 4.080478940992644, "grad_norm": 0.06261289145682652, "learning_rate": 1.8712340034886207e-05, "loss": 0.8467, "step": 232420 }, { "epoch": 4.080654505872645, "grad_norm": 0.050628349357312856, "learning_rate": 1.870614479963722e-05, "loss": 0.848, "step": 232430 }, { "epoch": 4.080830070752647, "grad_norm": 0.058978914059611046, "learning_rate": 1.8699950607067138e-05, "loss": 0.8346, "step": 232440 }, { "epoch": 4.081005635632648, "grad_norm": 0.05828792841724674, "learning_rate": 1.869375745725449e-05, "loss": 0.8465, "step": 232450 }, { "epoch": 4.081181200512649, "grad_norm": 0.057343245315436095, "learning_rate": 1.868756535027768e-05, "loss": 0.8401, "step": 232460 }, { "epoch": 4.081356765392651, "grad_norm": 0.07598158295073791, "learning_rate": 1.868137428621519e-05, "loss": 0.8355, "step": 232470 }, { "epoch": 4.081532330272652, "grad_norm": 0.05277949078841818, "learning_rate": 1.8675184265145497e-05, "loss": 0.8389, "step": 232480 }, { "epoch": 4.081707895152654, "grad_norm": 0.061109132932524395, "learning_rate": 1.866899528714696e-05, "loss": 0.8428, "step": 232490 }, { "epoch": 4.081883460032655, "grad_norm": 0.0646947099650042, "learning_rate": 1.8662807352298044e-05, "loss": 0.8422, "step": 232500 }, { "epoch": 4.082059024912656, "grad_norm": 0.04574593908837088, "learning_rate": 1.8656620460677125e-05, "loss": 0.8423, "step": 232510 }, { "epoch": 4.082234589792658, "grad_norm": 0.05243665555481325, "learning_rate": 1.8650434612362645e-05, "loss": 0.8389, "step": 232520 }, { "epoch": 4.082410154672659, "grad_norm": 0.06353797709150275, "learning_rate": 1.8644249807432908e-05, "loss": 0.8359, "step": 232530 }, { "epoch": 4.082585719552661, "grad_norm": 0.046949574881012666, "learning_rate": 1.8638066045966314e-05, "loss": 0.8387, "step": 232540 }, { "epoch": 4.082761284432662, "grad_norm": 0.04382628422790846, "learning_rate": 1.8631883328041228e-05, "loss": 0.8346, "step": 232550 }, { "epoch": 4.082936849312664, "grad_norm": 0.06842379076502902, "learning_rate": 1.8625701653735942e-05, "loss": 0.8481, "step": 232560 }, { "epoch": 4.083112414192665, "grad_norm": 0.05815154628723508, "learning_rate": 1.86195210231288e-05, "loss": 0.8367, "step": 232570 }, { "epoch": 4.083287979072666, "grad_norm": 0.05337918638360118, "learning_rate": 1.861334143629811e-05, "loss": 0.8348, "step": 232580 }, { "epoch": 4.083463543952668, "grad_norm": 0.049806083314725386, "learning_rate": 1.860716289332221e-05, "loss": 0.8508, "step": 232590 }, { "epoch": 4.083639108832669, "grad_norm": 0.04587333145666694, "learning_rate": 1.860098539427932e-05, "loss": 0.8363, "step": 232600 }, { "epoch": 4.083814673712671, "grad_norm": 0.04681498222881848, "learning_rate": 1.859480893924774e-05, "loss": 0.8394, "step": 232610 }, { "epoch": 4.083990238592672, "grad_norm": 0.04701641783079446, "learning_rate": 1.8588633528305742e-05, "loss": 0.834, "step": 232620 }, { "epoch": 4.084165803472674, "grad_norm": 0.04461893529935489, "learning_rate": 1.8582459161531526e-05, "loss": 0.8361, "step": 232630 }, { "epoch": 4.084341368352675, "grad_norm": 0.06908147253317377, "learning_rate": 1.8576285839003352e-05, "loss": 0.8423, "step": 232640 }, { "epoch": 4.084516933232676, "grad_norm": 0.05594422573152666, "learning_rate": 1.857011356079945e-05, "loss": 0.8492, "step": 232650 }, { "epoch": 4.084692498112678, "grad_norm": 0.04824984270621321, "learning_rate": 1.8563942326997986e-05, "loss": 0.8494, "step": 232660 }, { "epoch": 4.084868062992679, "grad_norm": 0.045889138582903714, "learning_rate": 1.8557772137677176e-05, "loss": 0.8406, "step": 232670 }, { "epoch": 4.085043627872681, "grad_norm": 0.04115440940883448, "learning_rate": 1.8551602992915188e-05, "loss": 0.8368, "step": 232680 }, { "epoch": 4.085219192752682, "grad_norm": 0.049671463394112, "learning_rate": 1.8545434892790223e-05, "loss": 0.8464, "step": 232690 }, { "epoch": 4.085394757632683, "grad_norm": 0.05321915096616634, "learning_rate": 1.8539267837380367e-05, "loss": 0.8399, "step": 232700 }, { "epoch": 4.085570322512685, "grad_norm": 0.059352757062620665, "learning_rate": 1.8533101826763798e-05, "loss": 0.8393, "step": 232710 }, { "epoch": 4.085745887392686, "grad_norm": 0.04195227087016098, "learning_rate": 1.8526936861018653e-05, "loss": 0.8413, "step": 232720 }, { "epoch": 4.085921452272688, "grad_norm": 0.046384972702589164, "learning_rate": 1.8520772940223e-05, "loss": 0.8382, "step": 232730 }, { "epoch": 4.086097017152689, "grad_norm": 0.06193230963292315, "learning_rate": 1.8514610064454953e-05, "loss": 0.8351, "step": 232740 }, { "epoch": 4.086272582032691, "grad_norm": 0.053881072799145745, "learning_rate": 1.8508448233792622e-05, "loss": 0.8422, "step": 232750 }, { "epoch": 4.086448146912692, "grad_norm": 0.06667844116391637, "learning_rate": 1.8502287448314073e-05, "loss": 0.837, "step": 232760 }, { "epoch": 4.086623711792693, "grad_norm": 0.0406441501081758, "learning_rate": 1.8496127708097333e-05, "loss": 0.8405, "step": 232770 }, { "epoch": 4.086799276672695, "grad_norm": 0.050888615194887056, "learning_rate": 1.848996901322047e-05, "loss": 0.8409, "step": 232780 }, { "epoch": 4.086974841552696, "grad_norm": 0.05202353192285021, "learning_rate": 1.848381136376153e-05, "loss": 0.8412, "step": 232790 }, { "epoch": 4.087150406432698, "grad_norm": 0.050114849978260045, "learning_rate": 1.8477654759798496e-05, "loss": 0.8459, "step": 232800 }, { "epoch": 4.087325971312699, "grad_norm": 0.04268633354325176, "learning_rate": 1.847149920140939e-05, "loss": 0.8355, "step": 232810 }, { "epoch": 4.0875015361927, "grad_norm": 0.05794187224054888, "learning_rate": 1.8465344688672224e-05, "loss": 0.8281, "step": 232820 }, { "epoch": 4.0876771010727015, "grad_norm": 0.04589682445696792, "learning_rate": 1.845919122166494e-05, "loss": 0.8334, "step": 232830 }, { "epoch": 4.087852665952703, "grad_norm": 0.04977059520610898, "learning_rate": 1.845303880046552e-05, "loss": 0.8442, "step": 232840 }, { "epoch": 4.0880282308327045, "grad_norm": 0.049849714828336264, "learning_rate": 1.8446887425151914e-05, "loss": 0.8412, "step": 232850 }, { "epoch": 4.0882037957127055, "grad_norm": 0.04256013520777272, "learning_rate": 1.8440737095802102e-05, "loss": 0.8374, "step": 232860 }, { "epoch": 4.0883793605927075, "grad_norm": 0.054472712948363894, "learning_rate": 1.8434587812493938e-05, "loss": 0.827, "step": 232870 }, { "epoch": 4.0885549254727085, "grad_norm": 0.0436622389598755, "learning_rate": 1.842843957530538e-05, "loss": 0.8402, "step": 232880 }, { "epoch": 4.0887304903527095, "grad_norm": 0.048621296147271925, "learning_rate": 1.842229238431433e-05, "loss": 0.8387, "step": 232890 }, { "epoch": 4.0889060552327114, "grad_norm": 0.0695110546441136, "learning_rate": 1.8416146239598638e-05, "loss": 0.8333, "step": 232900 }, { "epoch": 4.0890816201127125, "grad_norm": 0.042853287202203785, "learning_rate": 1.8410001141236213e-05, "loss": 0.8397, "step": 232910 }, { "epoch": 4.089257184992714, "grad_norm": 0.050323678413430814, "learning_rate": 1.8403857089304887e-05, "loss": 0.8442, "step": 232920 }, { "epoch": 4.089432749872715, "grad_norm": 0.052115623178527325, "learning_rate": 1.8397714083882554e-05, "loss": 0.8383, "step": 232930 }, { "epoch": 4.0896083147527165, "grad_norm": 0.0413142443974431, "learning_rate": 1.8391572125046985e-05, "loss": 0.8435, "step": 232940 }, { "epoch": 4.089783879632718, "grad_norm": 0.048707530023586014, "learning_rate": 1.8385431212876032e-05, "loss": 0.8365, "step": 232950 }, { "epoch": 4.089959444512719, "grad_norm": 0.050119646423522936, "learning_rate": 1.837929134744753e-05, "loss": 0.8352, "step": 232960 }, { "epoch": 4.090135009392721, "grad_norm": 0.0468437683838989, "learning_rate": 1.837315252883921e-05, "loss": 0.8387, "step": 232970 }, { "epoch": 4.090310574272722, "grad_norm": 0.05024785641494346, "learning_rate": 1.8367014757128884e-05, "loss": 0.8457, "step": 232980 }, { "epoch": 4.090486139152724, "grad_norm": 0.043589268848240256, "learning_rate": 1.8360878032394337e-05, "loss": 0.8319, "step": 232990 }, { "epoch": 4.090661704032725, "grad_norm": 0.048507727923426024, "learning_rate": 1.8354742354713275e-05, "loss": 0.8396, "step": 233000 }, { "epoch": 4.090837268912726, "grad_norm": 0.05299516331342044, "learning_rate": 1.8348607724163474e-05, "loss": 0.8373, "step": 233010 }, { "epoch": 4.091012833792728, "grad_norm": 0.04996041171477614, "learning_rate": 1.834247414082266e-05, "loss": 0.8353, "step": 233020 }, { "epoch": 4.091188398672729, "grad_norm": 0.04704166398782162, "learning_rate": 1.8336341604768556e-05, "loss": 0.8356, "step": 233030 }, { "epoch": 4.091363963552731, "grad_norm": 0.041295761164709274, "learning_rate": 1.8330210116078815e-05, "loss": 0.8451, "step": 233040 }, { "epoch": 4.091539528432732, "grad_norm": 0.05391981670961067, "learning_rate": 1.832407967483116e-05, "loss": 0.8445, "step": 233050 }, { "epoch": 4.091715093312733, "grad_norm": 0.08687075077461112, "learning_rate": 1.83179502811033e-05, "loss": 0.8421, "step": 233060 }, { "epoch": 4.091890658192735, "grad_norm": 0.047478101534727306, "learning_rate": 1.8311821934972813e-05, "loss": 0.8309, "step": 233070 }, { "epoch": 4.092066223072736, "grad_norm": 0.05650383025850332, "learning_rate": 1.8305694636517393e-05, "loss": 0.8433, "step": 233080 }, { "epoch": 4.092241787952738, "grad_norm": 0.050116071210993575, "learning_rate": 1.8299568385814677e-05, "loss": 0.844, "step": 233090 }, { "epoch": 4.092417352832739, "grad_norm": 0.0583513054486585, "learning_rate": 1.8293443182942296e-05, "loss": 0.8389, "step": 233100 }, { "epoch": 4.092592917712741, "grad_norm": 0.048428652444198764, "learning_rate": 1.8287319027977826e-05, "loss": 0.8322, "step": 233110 }, { "epoch": 4.092768482592742, "grad_norm": 0.04560649728051521, "learning_rate": 1.8281195920998863e-05, "loss": 0.8347, "step": 233120 }, { "epoch": 4.092944047472743, "grad_norm": 0.059023708291281646, "learning_rate": 1.8275073862083035e-05, "loss": 0.8437, "step": 233130 }, { "epoch": 4.093119612352745, "grad_norm": 0.04514433272381797, "learning_rate": 1.8268952851307847e-05, "loss": 0.8448, "step": 233140 }, { "epoch": 4.093295177232746, "grad_norm": 0.061180280336695765, "learning_rate": 1.8262832888750883e-05, "loss": 0.8383, "step": 233150 }, { "epoch": 4.093470742112748, "grad_norm": 0.047405310107196195, "learning_rate": 1.8256713974489698e-05, "loss": 0.8423, "step": 233160 }, { "epoch": 4.093646306992749, "grad_norm": 0.0644385639552445, "learning_rate": 1.8250596108601787e-05, "loss": 0.8343, "step": 233170 }, { "epoch": 4.09382187187275, "grad_norm": 0.06077066463067004, "learning_rate": 1.8244479291164686e-05, "loss": 0.8316, "step": 233180 }, { "epoch": 4.093997436752752, "grad_norm": 0.0668470530430684, "learning_rate": 1.8238363522255897e-05, "loss": 0.8433, "step": 233190 }, { "epoch": 4.094173001632753, "grad_norm": 0.05844814558590147, "learning_rate": 1.823224880195291e-05, "loss": 0.8389, "step": 233200 }, { "epoch": 4.094348566512755, "grad_norm": 0.05430920236008408, "learning_rate": 1.8226135130333177e-05, "loss": 0.843, "step": 233210 }, { "epoch": 4.094524131392756, "grad_norm": 0.0545037678603426, "learning_rate": 1.822002250747418e-05, "loss": 0.8421, "step": 233220 }, { "epoch": 4.094699696272758, "grad_norm": 0.055962272566363734, "learning_rate": 1.8213910933453384e-05, "loss": 0.8337, "step": 233230 }, { "epoch": 4.094875261152759, "grad_norm": 0.06411045179098769, "learning_rate": 1.820780040834818e-05, "loss": 0.8332, "step": 233240 }, { "epoch": 4.09505082603276, "grad_norm": 0.05588211793486126, "learning_rate": 1.8201690932236013e-05, "loss": 0.8432, "step": 233250 }, { "epoch": 4.095226390912762, "grad_norm": 0.050076013843710736, "learning_rate": 1.8195582505194285e-05, "loss": 0.8463, "step": 233260 }, { "epoch": 4.095401955792763, "grad_norm": 0.05973457974885507, "learning_rate": 1.8189475127300427e-05, "loss": 0.8486, "step": 233270 }, { "epoch": 4.095577520672765, "grad_norm": 0.0690430328011517, "learning_rate": 1.8183368798631752e-05, "loss": 0.8379, "step": 233280 }, { "epoch": 4.095753085552766, "grad_norm": 0.05396149575681845, "learning_rate": 1.8177263519265686e-05, "loss": 0.8357, "step": 233290 }, { "epoch": 4.095928650432767, "grad_norm": 0.055837088435569504, "learning_rate": 1.8171159289279567e-05, "loss": 0.8454, "step": 233300 }, { "epoch": 4.096104215312769, "grad_norm": 0.06239318670214508, "learning_rate": 1.816505610875073e-05, "loss": 0.8399, "step": 233310 }, { "epoch": 4.09627978019277, "grad_norm": 0.05925679994062814, "learning_rate": 1.8158953977756492e-05, "loss": 0.8403, "step": 233320 }, { "epoch": 4.096455345072772, "grad_norm": 0.05620066996172539, "learning_rate": 1.815285289637421e-05, "loss": 0.8358, "step": 233330 }, { "epoch": 4.096630909952773, "grad_norm": 0.05733214838478868, "learning_rate": 1.8146752864681142e-05, "loss": 0.8399, "step": 233340 }, { "epoch": 4.096806474832775, "grad_norm": 0.06833194199768243, "learning_rate": 1.8140653882754592e-05, "loss": 0.8435, "step": 233350 }, { "epoch": 4.096982039712776, "grad_norm": 0.05346964513928917, "learning_rate": 1.8134555950671837e-05, "loss": 0.8327, "step": 233360 }, { "epoch": 4.097157604592777, "grad_norm": 0.060917166625946643, "learning_rate": 1.812845906851016e-05, "loss": 0.8451, "step": 233370 }, { "epoch": 4.097333169472779, "grad_norm": 0.04689440849062854, "learning_rate": 1.812236323634677e-05, "loss": 0.8441, "step": 233380 }, { "epoch": 4.09750873435278, "grad_norm": 0.05435368174059268, "learning_rate": 1.8116268454258918e-05, "loss": 0.8371, "step": 233390 }, { "epoch": 4.097684299232782, "grad_norm": 0.04252032563831401, "learning_rate": 1.8110174722323857e-05, "loss": 0.8398, "step": 233400 }, { "epoch": 4.097859864112783, "grad_norm": 0.05048437645936816, "learning_rate": 1.8104082040618732e-05, "loss": 0.8414, "step": 233410 }, { "epoch": 4.098035428992784, "grad_norm": 0.052211778602426456, "learning_rate": 1.8097990409220785e-05, "loss": 0.8401, "step": 233420 }, { "epoch": 4.098210993872786, "grad_norm": 0.0470390501502205, "learning_rate": 1.8091899828207182e-05, "loss": 0.8387, "step": 233430 }, { "epoch": 4.098386558752787, "grad_norm": 0.049902327940477664, "learning_rate": 1.8085810297655122e-05, "loss": 0.8333, "step": 233440 }, { "epoch": 4.098562123632789, "grad_norm": 0.04350708021272303, "learning_rate": 1.8079721817641717e-05, "loss": 0.8462, "step": 233450 }, { "epoch": 4.09873768851279, "grad_norm": 0.04791241245860213, "learning_rate": 1.8073634388244123e-05, "loss": 0.8388, "step": 233460 }, { "epoch": 4.098913253392792, "grad_norm": 0.046086679511281065, "learning_rate": 1.8067548009539503e-05, "loss": 0.8392, "step": 233470 }, { "epoch": 4.099088818272793, "grad_norm": 0.04332022657037043, "learning_rate": 1.8061462681604915e-05, "loss": 0.846, "step": 233480 }, { "epoch": 4.099264383152794, "grad_norm": 0.07218755134070327, "learning_rate": 1.8055378404517494e-05, "loss": 0.8346, "step": 233490 }, { "epoch": 4.099439948032796, "grad_norm": 0.059148540645545485, "learning_rate": 1.8049295178354354e-05, "loss": 0.8446, "step": 233500 }, { "epoch": 4.099615512912797, "grad_norm": 0.08229480191953949, "learning_rate": 1.8043213003192507e-05, "loss": 0.831, "step": 233510 }, { "epoch": 4.099791077792799, "grad_norm": 0.06175842020667579, "learning_rate": 1.8037131879109054e-05, "loss": 0.8481, "step": 233520 }, { "epoch": 4.0999666426728, "grad_norm": 0.05066399644918555, "learning_rate": 1.8031051806181036e-05, "loss": 0.8412, "step": 233530 }, { "epoch": 4.100142207552802, "grad_norm": 0.05056323547815906, "learning_rate": 1.8024972784485518e-05, "loss": 0.8303, "step": 233540 }, { "epoch": 4.100317772432803, "grad_norm": 0.04707043362493295, "learning_rate": 1.801889481409948e-05, "loss": 0.837, "step": 233550 }, { "epoch": 4.100493337312804, "grad_norm": 0.05751873262031128, "learning_rate": 1.8012817895099944e-05, "loss": 0.8422, "step": 233560 }, { "epoch": 4.1006689021928056, "grad_norm": 0.044505001094348705, "learning_rate": 1.8006742027563933e-05, "loss": 0.8416, "step": 233570 }, { "epoch": 4.100844467072807, "grad_norm": 0.05060607732185944, "learning_rate": 1.8000667211568378e-05, "loss": 0.8405, "step": 233580 }, { "epoch": 4.1010200319528085, "grad_norm": 0.048849565985029646, "learning_rate": 1.799459344719028e-05, "loss": 0.8346, "step": 233590 }, { "epoch": 4.1011955968328095, "grad_norm": 0.05188911678409261, "learning_rate": 1.7988520734506592e-05, "loss": 0.8436, "step": 233600 }, { "epoch": 4.101371161712811, "grad_norm": 0.05417765173245984, "learning_rate": 1.7982449073594278e-05, "loss": 0.8426, "step": 233610 }, { "epoch": 4.1015467265928125, "grad_norm": 0.05405200495902855, "learning_rate": 1.7976378464530225e-05, "loss": 0.8327, "step": 233620 }, { "epoch": 4.1017222914728135, "grad_norm": 0.04295404103962365, "learning_rate": 1.7970308907391365e-05, "loss": 0.8554, "step": 233630 }, { "epoch": 4.1018978563528155, "grad_norm": 0.05797015858269802, "learning_rate": 1.796424040225462e-05, "loss": 0.8371, "step": 233640 }, { "epoch": 4.1020734212328165, "grad_norm": 0.06056425478223552, "learning_rate": 1.7958172949196847e-05, "loss": 0.8308, "step": 233650 }, { "epoch": 4.102248986112818, "grad_norm": 0.05716513569091293, "learning_rate": 1.795210654829494e-05, "loss": 0.8416, "step": 233660 }, { "epoch": 4.102424550992819, "grad_norm": 0.07438481216184158, "learning_rate": 1.794604119962578e-05, "loss": 0.843, "step": 233670 }, { "epoch": 4.1026001158728205, "grad_norm": 0.05543868750553875, "learning_rate": 1.7939976903266172e-05, "loss": 0.8385, "step": 233680 }, { "epoch": 4.102775680752822, "grad_norm": 0.06460047541232235, "learning_rate": 1.7933913659292967e-05, "loss": 0.8462, "step": 233690 }, { "epoch": 4.102951245632823, "grad_norm": 0.05365922563709247, "learning_rate": 1.792785146778301e-05, "loss": 0.8349, "step": 233700 }, { "epoch": 4.103126810512825, "grad_norm": 0.05513468112755364, "learning_rate": 1.7921790328813106e-05, "loss": 0.8331, "step": 233710 }, { "epoch": 4.103302375392826, "grad_norm": 0.054282590587864464, "learning_rate": 1.791573024246002e-05, "loss": 0.8384, "step": 233720 }, { "epoch": 4.103477940272827, "grad_norm": 0.048216211684415304, "learning_rate": 1.790967120880056e-05, "loss": 0.8406, "step": 233730 }, { "epoch": 4.103653505152829, "grad_norm": 0.06014352041070765, "learning_rate": 1.7903613227911506e-05, "loss": 0.8389, "step": 233740 }, { "epoch": 4.10382907003283, "grad_norm": 0.0507279258358342, "learning_rate": 1.7897556299869585e-05, "loss": 0.8423, "step": 233750 }, { "epoch": 4.104004634912832, "grad_norm": 0.09378649066607059, "learning_rate": 1.789150042475155e-05, "loss": 0.8394, "step": 233760 }, { "epoch": 4.104180199792833, "grad_norm": 0.052986146349155765, "learning_rate": 1.7885445602634123e-05, "loss": 0.8439, "step": 233770 }, { "epoch": 4.104355764672835, "grad_norm": 0.045694626288392264, "learning_rate": 1.7879391833594057e-05, "loss": 0.8433, "step": 233780 }, { "epoch": 4.104531329552836, "grad_norm": 0.05664677560046383, "learning_rate": 1.7873339117708002e-05, "loss": 0.8383, "step": 233790 }, { "epoch": 4.104706894432837, "grad_norm": 0.05997197434309478, "learning_rate": 1.786728745505267e-05, "loss": 0.8417, "step": 233800 }, { "epoch": 4.104882459312839, "grad_norm": 0.049627609131123046, "learning_rate": 1.786123684570477e-05, "loss": 0.8439, "step": 233810 }, { "epoch": 4.10505802419284, "grad_norm": 0.066192475515816, "learning_rate": 1.78551872897409e-05, "loss": 0.8274, "step": 233820 }, { "epoch": 4.105233589072842, "grad_norm": 0.0544530215305201, "learning_rate": 1.7849138787237746e-05, "loss": 0.8362, "step": 233830 }, { "epoch": 4.105409153952843, "grad_norm": 0.05465602413923256, "learning_rate": 1.7843091338271966e-05, "loss": 0.8394, "step": 233840 }, { "epoch": 4.105584718832844, "grad_norm": 0.05220597570596629, "learning_rate": 1.7837044942920125e-05, "loss": 0.8332, "step": 233850 }, { "epoch": 4.105760283712846, "grad_norm": 0.06344808978619919, "learning_rate": 1.783099960125887e-05, "loss": 0.8263, "step": 233860 }, { "epoch": 4.105935848592847, "grad_norm": 0.06459812064634493, "learning_rate": 1.782495531336479e-05, "loss": 0.8375, "step": 233870 }, { "epoch": 4.106111413472849, "grad_norm": 0.04546656478326062, "learning_rate": 1.7818912079314477e-05, "loss": 0.8501, "step": 233880 }, { "epoch": 4.10628697835285, "grad_norm": 0.06021879491841797, "learning_rate": 1.7812869899184483e-05, "loss": 0.8361, "step": 233890 }, { "epoch": 4.106462543232852, "grad_norm": 0.04581336406265435, "learning_rate": 1.7806828773051355e-05, "loss": 0.8456, "step": 233900 }, { "epoch": 4.106638108112853, "grad_norm": 0.044714838561385986, "learning_rate": 1.7800788700991685e-05, "loss": 0.8414, "step": 233910 }, { "epoch": 4.106813672992854, "grad_norm": 0.05861089296421893, "learning_rate": 1.7794749683081943e-05, "loss": 0.8499, "step": 233920 }, { "epoch": 4.106989237872856, "grad_norm": 0.05955066406818617, "learning_rate": 1.778871171939866e-05, "loss": 0.8423, "step": 233930 }, { "epoch": 4.107164802752857, "grad_norm": 0.05133327553972502, "learning_rate": 1.7782674810018353e-05, "loss": 0.8327, "step": 233940 }, { "epoch": 4.107340367632859, "grad_norm": 0.06403013990216798, "learning_rate": 1.7776638955017528e-05, "loss": 0.8408, "step": 233950 }, { "epoch": 4.10751593251286, "grad_norm": 0.04812748674418775, "learning_rate": 1.777060415447261e-05, "loss": 0.8507, "step": 233960 }, { "epoch": 4.107691497392861, "grad_norm": 0.07134291456895304, "learning_rate": 1.776457040846009e-05, "loss": 0.838, "step": 233970 }, { "epoch": 4.107867062272863, "grad_norm": 0.0411324117555695, "learning_rate": 1.7758537717056447e-05, "loss": 0.8425, "step": 233980 }, { "epoch": 4.108042627152864, "grad_norm": 0.04225511073102169, "learning_rate": 1.7752506080338046e-05, "loss": 0.8353, "step": 233990 }, { "epoch": 4.108218192032866, "grad_norm": 0.06173432864353543, "learning_rate": 1.7746475498381364e-05, "loss": 0.8409, "step": 234000 }, { "epoch": 4.108393756912867, "grad_norm": 0.04523980915434045, "learning_rate": 1.77404459712628e-05, "loss": 0.8329, "step": 234010 }, { "epoch": 4.108569321792869, "grad_norm": 0.06390683259745612, "learning_rate": 1.7734417499058735e-05, "loss": 0.832, "step": 234020 }, { "epoch": 4.10874488667287, "grad_norm": 0.05314354979162781, "learning_rate": 1.772839008184554e-05, "loss": 0.8342, "step": 234030 }, { "epoch": 4.108920451552871, "grad_norm": 0.05189840103745334, "learning_rate": 1.7722363719699613e-05, "loss": 0.8356, "step": 234040 }, { "epoch": 4.109096016432873, "grad_norm": 0.04960757241282705, "learning_rate": 1.7716338412697314e-05, "loss": 0.8446, "step": 234050 }, { "epoch": 4.109271581312874, "grad_norm": 0.05154251328881215, "learning_rate": 1.771031416091496e-05, "loss": 0.8427, "step": 234060 }, { "epoch": 4.109447146192876, "grad_norm": 0.050989653230786545, "learning_rate": 1.770429096442887e-05, "loss": 0.8426, "step": 234070 }, { "epoch": 4.109622711072877, "grad_norm": 0.05183205824271167, "learning_rate": 1.769826882331541e-05, "loss": 0.8395, "step": 234080 }, { "epoch": 4.109798275952878, "grad_norm": 0.05700269545847513, "learning_rate": 1.7692247737650817e-05, "loss": 0.8389, "step": 234090 }, { "epoch": 4.10997384083288, "grad_norm": 0.04536213258798704, "learning_rate": 1.768622770751143e-05, "loss": 0.8381, "step": 234100 }, { "epoch": 4.110149405712881, "grad_norm": 0.05705601102265365, "learning_rate": 1.768020873297349e-05, "loss": 0.8413, "step": 234110 }, { "epoch": 4.110324970592883, "grad_norm": 0.06203536652009816, "learning_rate": 1.7674190814113297e-05, "loss": 0.8344, "step": 234120 }, { "epoch": 4.110500535472884, "grad_norm": 0.046688362270438946, "learning_rate": 1.766817395100705e-05, "loss": 0.8363, "step": 234130 }, { "epoch": 4.110676100352886, "grad_norm": 0.0636845408943228, "learning_rate": 1.7662158143731015e-05, "loss": 0.8448, "step": 234140 }, { "epoch": 4.110851665232887, "grad_norm": 0.053320385749331484, "learning_rate": 1.7656143392361432e-05, "loss": 0.8322, "step": 234150 }, { "epoch": 4.111027230112888, "grad_norm": 0.04568045631908342, "learning_rate": 1.765012969697446e-05, "loss": 0.8423, "step": 234160 }, { "epoch": 4.11120279499289, "grad_norm": 0.049795361241490636, "learning_rate": 1.7644117057646324e-05, "loss": 0.8417, "step": 234170 }, { "epoch": 4.111378359872891, "grad_norm": 0.04883126281315454, "learning_rate": 1.7638105474453213e-05, "loss": 0.8322, "step": 234180 }, { "epoch": 4.111553924752893, "grad_norm": 0.07636779491731244, "learning_rate": 1.7632094947471265e-05, "loss": 0.8454, "step": 234190 }, { "epoch": 4.111729489632894, "grad_norm": 0.06237136436762392, "learning_rate": 1.7626085476776648e-05, "loss": 0.8363, "step": 234200 }, { "epoch": 4.111905054512896, "grad_norm": 0.051231858687043406, "learning_rate": 1.762007706244551e-05, "loss": 0.8394, "step": 234210 }, { "epoch": 4.112080619392897, "grad_norm": 0.04741821374448961, "learning_rate": 1.7614069704554003e-05, "loss": 0.8453, "step": 234220 }, { "epoch": 4.112256184272898, "grad_norm": 0.04320414358673438, "learning_rate": 1.7608063403178186e-05, "loss": 0.8392, "step": 234230 }, { "epoch": 4.1124317491529, "grad_norm": 0.06259767405206428, "learning_rate": 1.760205815839418e-05, "loss": 0.8339, "step": 234240 }, { "epoch": 4.112607314032901, "grad_norm": 0.06112520899029145, "learning_rate": 1.759605397027811e-05, "loss": 0.8401, "step": 234250 }, { "epoch": 4.112782878912903, "grad_norm": 0.06361678946433064, "learning_rate": 1.7590050838906e-05, "loss": 0.8364, "step": 234260 }, { "epoch": 4.112958443792904, "grad_norm": 0.04923098733482669, "learning_rate": 1.7584048764353923e-05, "loss": 0.8393, "step": 234270 }, { "epoch": 4.113134008672905, "grad_norm": 0.05317518465812162, "learning_rate": 1.757804774669795e-05, "loss": 0.8396, "step": 234280 }, { "epoch": 4.113309573552907, "grad_norm": 0.0564742776975935, "learning_rate": 1.757204778601411e-05, "loss": 0.844, "step": 234290 }, { "epoch": 4.113485138432908, "grad_norm": 0.04447742852524688, "learning_rate": 1.7566048882378394e-05, "loss": 0.8417, "step": 234300 }, { "epoch": 4.11366070331291, "grad_norm": 0.06251673754193275, "learning_rate": 1.7560051035866825e-05, "loss": 0.8371, "step": 234310 }, { "epoch": 4.113836268192911, "grad_norm": 0.05707089925164556, "learning_rate": 1.755405424655543e-05, "loss": 0.846, "step": 234320 }, { "epoch": 4.1140118330729125, "grad_norm": 0.04883766353868733, "learning_rate": 1.7548058514520136e-05, "loss": 0.8356, "step": 234330 }, { "epoch": 4.1141873979529135, "grad_norm": 0.074588182953143, "learning_rate": 1.7542063839836923e-05, "loss": 0.8321, "step": 234340 }, { "epoch": 4.114362962832915, "grad_norm": 0.04886197662497424, "learning_rate": 1.7536070222581787e-05, "loss": 0.8477, "step": 234350 }, { "epoch": 4.1145385277129165, "grad_norm": 0.05149736686885737, "learning_rate": 1.7530077662830624e-05, "loss": 0.845, "step": 234360 }, { "epoch": 4.1147140925929175, "grad_norm": 0.07031067814553756, "learning_rate": 1.7524086160659357e-05, "loss": 0.839, "step": 234370 }, { "epoch": 4.1148896574729195, "grad_norm": 0.05983683438461239, "learning_rate": 1.7518095716143925e-05, "loss": 0.8412, "step": 234380 }, { "epoch": 4.1150652223529205, "grad_norm": 0.05093948577966753, "learning_rate": 1.7512106329360245e-05, "loss": 0.8415, "step": 234390 }, { "epoch": 4.1152407872329215, "grad_norm": 0.06099773981430376, "learning_rate": 1.7506118000384144e-05, "loss": 0.8367, "step": 234400 }, { "epoch": 4.1154163521129234, "grad_norm": 0.055041589226949336, "learning_rate": 1.7500130729291543e-05, "loss": 0.8373, "step": 234410 }, { "epoch": 4.1155919169929245, "grad_norm": 0.052742809543159755, "learning_rate": 1.749414451615831e-05, "loss": 0.8444, "step": 234420 }, { "epoch": 4.115767481872926, "grad_norm": 0.04600618092886495, "learning_rate": 1.7488159361060243e-05, "loss": 0.84, "step": 234430 }, { "epoch": 4.115943046752927, "grad_norm": 0.05186259818202852, "learning_rate": 1.7482175264073196e-05, "loss": 0.8406, "step": 234440 }, { "epoch": 4.116118611632929, "grad_norm": 0.053999639017999444, "learning_rate": 1.7476192225273e-05, "loss": 0.8385, "step": 234450 }, { "epoch": 4.11629417651293, "grad_norm": 0.04945080919736853, "learning_rate": 1.747021024473548e-05, "loss": 0.8456, "step": 234460 }, { "epoch": 4.116469741392931, "grad_norm": 0.041614555424759656, "learning_rate": 1.7464229322536378e-05, "loss": 0.8358, "step": 234470 }, { "epoch": 4.116645306272933, "grad_norm": 0.05125609536613273, "learning_rate": 1.7458249458751498e-05, "loss": 0.8417, "step": 234480 }, { "epoch": 4.116820871152934, "grad_norm": 0.04255487748945043, "learning_rate": 1.745227065345664e-05, "loss": 0.84, "step": 234490 }, { "epoch": 4.116996436032936, "grad_norm": 0.054202744389663736, "learning_rate": 1.7446292906727497e-05, "loss": 0.8407, "step": 234500 }, { "epoch": 4.117172000912937, "grad_norm": 0.05288665338138002, "learning_rate": 1.7440316218639838e-05, "loss": 0.8402, "step": 234510 }, { "epoch": 4.117347565792938, "grad_norm": 0.06038664446387986, "learning_rate": 1.7434340589269403e-05, "loss": 0.8411, "step": 234520 }, { "epoch": 4.11752313067294, "grad_norm": 0.04814584081758831, "learning_rate": 1.742836601869187e-05, "loss": 0.8405, "step": 234530 }, { "epoch": 4.117698695552941, "grad_norm": 0.05396248182411174, "learning_rate": 1.742239250698296e-05, "loss": 0.8362, "step": 234540 }, { "epoch": 4.117874260432943, "grad_norm": 0.05382661332812177, "learning_rate": 1.7416420054218345e-05, "loss": 0.8417, "step": 234550 }, { "epoch": 4.118049825312944, "grad_norm": 0.0414453055648091, "learning_rate": 1.7410448660473737e-05, "loss": 0.8406, "step": 234560 }, { "epoch": 4.118225390192946, "grad_norm": 0.04987369774080971, "learning_rate": 1.7404478325824737e-05, "loss": 0.8343, "step": 234570 }, { "epoch": 4.118400955072947, "grad_norm": 0.04399735346327648, "learning_rate": 1.739850905034702e-05, "loss": 0.8455, "step": 234580 }, { "epoch": 4.118576519952948, "grad_norm": 0.056959679684240705, "learning_rate": 1.7392540834116236e-05, "loss": 0.8397, "step": 234590 }, { "epoch": 4.11875208483295, "grad_norm": 0.07239583776098774, "learning_rate": 1.738657367720796e-05, "loss": 0.8436, "step": 234600 }, { "epoch": 4.118927649712951, "grad_norm": 0.053675120037024224, "learning_rate": 1.7380607579697826e-05, "loss": 0.8407, "step": 234610 }, { "epoch": 4.119103214592953, "grad_norm": 0.05311120623634274, "learning_rate": 1.737464254166142e-05, "loss": 0.8304, "step": 234620 }, { "epoch": 4.119278779472954, "grad_norm": 0.054967653288015964, "learning_rate": 1.7368678563174336e-05, "loss": 0.8389, "step": 234630 }, { "epoch": 4.119454344352955, "grad_norm": 0.04794764088383234, "learning_rate": 1.73627156443121e-05, "loss": 0.8398, "step": 234640 }, { "epoch": 4.119629909232957, "grad_norm": 0.05564567894472691, "learning_rate": 1.7356753785150293e-05, "loss": 0.8414, "step": 234650 }, { "epoch": 4.119805474112958, "grad_norm": 0.04879054866690539, "learning_rate": 1.735079298576446e-05, "loss": 0.8321, "step": 234660 }, { "epoch": 4.11998103899296, "grad_norm": 0.05191277534873428, "learning_rate": 1.734483324623009e-05, "loss": 0.8422, "step": 234670 }, { "epoch": 4.120156603872961, "grad_norm": 0.05310921042825396, "learning_rate": 1.7338874566622714e-05, "loss": 0.8411, "step": 234680 }, { "epoch": 4.120332168752963, "grad_norm": 0.06275827900395828, "learning_rate": 1.733291694701786e-05, "loss": 0.8383, "step": 234690 }, { "epoch": 4.120507733632964, "grad_norm": 0.06344737019212822, "learning_rate": 1.7326960387490952e-05, "loss": 0.8411, "step": 234700 }, { "epoch": 4.120683298512965, "grad_norm": 0.09488362145822753, "learning_rate": 1.73210048881175e-05, "loss": 0.8417, "step": 234710 }, { "epoch": 4.120858863392967, "grad_norm": 0.04987990973960136, "learning_rate": 1.731505044897295e-05, "loss": 0.841, "step": 234720 }, { "epoch": 4.121034428272968, "grad_norm": 0.05084155618949037, "learning_rate": 1.7309097070132778e-05, "loss": 0.8412, "step": 234730 }, { "epoch": 4.12120999315297, "grad_norm": 0.04455123554308123, "learning_rate": 1.7303144751672346e-05, "loss": 0.8396, "step": 234740 }, { "epoch": 4.121385558032971, "grad_norm": 0.04346811713998301, "learning_rate": 1.729719349366712e-05, "loss": 0.8378, "step": 234750 }, { "epoch": 4.121561122912972, "grad_norm": 0.05001197070497517, "learning_rate": 1.729124329619252e-05, "loss": 0.8478, "step": 234760 }, { "epoch": 4.121736687792974, "grad_norm": 0.05299884212328996, "learning_rate": 1.7285294159323888e-05, "loss": 0.8354, "step": 234770 }, { "epoch": 4.121912252672975, "grad_norm": 0.057647671594959635, "learning_rate": 1.7279346083136614e-05, "loss": 0.8432, "step": 234780 }, { "epoch": 4.122087817552977, "grad_norm": 0.06195874734413342, "learning_rate": 1.7273399067706078e-05, "loss": 0.8327, "step": 234790 }, { "epoch": 4.122263382432978, "grad_norm": 0.05750256029646597, "learning_rate": 1.7267453113107642e-05, "loss": 0.8387, "step": 234800 }, { "epoch": 4.12243894731298, "grad_norm": 0.05250991722348684, "learning_rate": 1.7261508219416603e-05, "loss": 0.8383, "step": 234810 }, { "epoch": 4.122614512192981, "grad_norm": 0.05515349795894327, "learning_rate": 1.7255564386708294e-05, "loss": 0.8414, "step": 234820 }, { "epoch": 4.122790077072982, "grad_norm": 0.05827120208085201, "learning_rate": 1.7249621615058063e-05, "loss": 0.8433, "step": 234830 }, { "epoch": 4.122965641952984, "grad_norm": 0.0464705873558169, "learning_rate": 1.7243679904541153e-05, "loss": 0.845, "step": 234840 }, { "epoch": 4.123141206832985, "grad_norm": 0.04497342180548477, "learning_rate": 1.723773925523287e-05, "loss": 0.8418, "step": 234850 }, { "epoch": 4.123316771712987, "grad_norm": 0.05152269147502093, "learning_rate": 1.7231799667208507e-05, "loss": 0.837, "step": 234860 }, { "epoch": 4.123492336592988, "grad_norm": 0.0546796501331538, "learning_rate": 1.722586114054328e-05, "loss": 0.8463, "step": 234870 }, { "epoch": 4.12366790147299, "grad_norm": 0.060523403138548725, "learning_rate": 1.7219923675312437e-05, "loss": 0.8388, "step": 234880 }, { "epoch": 4.123843466352991, "grad_norm": 0.0674122183476116, "learning_rate": 1.7213987271591224e-05, "loss": 0.8281, "step": 234890 }, { "epoch": 4.124019031232992, "grad_norm": 0.0697767245929838, "learning_rate": 1.720805192945487e-05, "loss": 0.8383, "step": 234900 }, { "epoch": 4.124194596112994, "grad_norm": 0.05267889483543525, "learning_rate": 1.7202117648978527e-05, "loss": 0.8423, "step": 234910 }, { "epoch": 4.124370160992995, "grad_norm": 0.04500525839719952, "learning_rate": 1.719618443023742e-05, "loss": 0.8419, "step": 234920 }, { "epoch": 4.124545725872997, "grad_norm": 0.054034297720997285, "learning_rate": 1.7190252273306747e-05, "loss": 0.8342, "step": 234930 }, { "epoch": 4.124721290752998, "grad_norm": 0.048646665372742044, "learning_rate": 1.7184321178261612e-05, "loss": 0.8389, "step": 234940 }, { "epoch": 4.124896855632999, "grad_norm": 0.057179047025789194, "learning_rate": 1.7178391145177185e-05, "loss": 0.8366, "step": 234950 }, { "epoch": 4.125072420513001, "grad_norm": 0.050954832213349434, "learning_rate": 1.7172462174128613e-05, "loss": 0.8401, "step": 234960 }, { "epoch": 4.125247985393002, "grad_norm": 0.06950686316195245, "learning_rate": 1.716653426519103e-05, "loss": 0.8362, "step": 234970 }, { "epoch": 4.125423550273004, "grad_norm": 0.04164361234012297, "learning_rate": 1.7160607418439506e-05, "loss": 0.8362, "step": 234980 }, { "epoch": 4.125599115153005, "grad_norm": 0.04969816147983404, "learning_rate": 1.7154681633949146e-05, "loss": 0.8368, "step": 234990 }, { "epoch": 4.125774680033007, "grad_norm": 0.05559943769587121, "learning_rate": 1.7148756911795064e-05, "loss": 0.8418, "step": 235000 }, { "epoch": 4.125950244913008, "grad_norm": 0.04573122592216604, "learning_rate": 1.7142833252052283e-05, "loss": 0.8417, "step": 235010 }, { "epoch": 4.126125809793009, "grad_norm": 0.07315233313308833, "learning_rate": 1.7136910654795875e-05, "loss": 0.8358, "step": 235020 }, { "epoch": 4.126301374673011, "grad_norm": 0.07279770267905203, "learning_rate": 1.7130989120100902e-05, "loss": 0.8384, "step": 235030 }, { "epoch": 4.126476939553012, "grad_norm": 0.05722818153628665, "learning_rate": 1.7125068648042352e-05, "loss": 0.8441, "step": 235040 }, { "epoch": 4.126652504433014, "grad_norm": 0.04724630685113138, "learning_rate": 1.7119149238695254e-05, "loss": 0.8389, "step": 235050 }, { "epoch": 4.126828069313015, "grad_norm": 0.046272910811731946, "learning_rate": 1.711323089213461e-05, "loss": 0.8379, "step": 235060 }, { "epoch": 4.127003634193016, "grad_norm": 0.0453612340908399, "learning_rate": 1.7107313608435425e-05, "loss": 0.8408, "step": 235070 }, { "epoch": 4.127179199073018, "grad_norm": 0.0689793439772486, "learning_rate": 1.7101397387672635e-05, "loss": 0.8446, "step": 235080 }, { "epoch": 4.127354763953019, "grad_norm": 0.06240268830373271, "learning_rate": 1.7095482229921225e-05, "loss": 0.845, "step": 235090 }, { "epoch": 4.1275303288330205, "grad_norm": 0.05276254878899509, "learning_rate": 1.7089568135256155e-05, "loss": 0.8335, "step": 235100 }, { "epoch": 4.1277058937130215, "grad_norm": 0.05108152621859422, "learning_rate": 1.7083655103752318e-05, "loss": 0.8384, "step": 235110 }, { "epoch": 4.1278814585930235, "grad_norm": 0.04965126181644683, "learning_rate": 1.707774313548465e-05, "loss": 0.8445, "step": 235120 }, { "epoch": 4.1280570234730245, "grad_norm": 0.06289319520830705, "learning_rate": 1.707183223052806e-05, "loss": 0.8391, "step": 235130 }, { "epoch": 4.1282325883530255, "grad_norm": 0.04750486372806142, "learning_rate": 1.706592238895746e-05, "loss": 0.8345, "step": 235140 }, { "epoch": 4.1284081532330275, "grad_norm": 0.04228958733517577, "learning_rate": 1.7060013610847695e-05, "loss": 0.8398, "step": 235150 }, { "epoch": 4.1285837181130285, "grad_norm": 0.06886201901120413, "learning_rate": 1.7054105896273632e-05, "loss": 0.8443, "step": 235160 }, { "epoch": 4.12875928299303, "grad_norm": 0.04883282222432168, "learning_rate": 1.7048199245310165e-05, "loss": 0.8331, "step": 235170 }, { "epoch": 4.128934847873031, "grad_norm": 0.04527842336934617, "learning_rate": 1.7042293658032082e-05, "loss": 0.8372, "step": 235180 }, { "epoch": 4.1291104127530325, "grad_norm": 0.050596089523548944, "learning_rate": 1.703638913451423e-05, "loss": 0.844, "step": 235190 }, { "epoch": 4.129285977633034, "grad_norm": 0.05700427590244759, "learning_rate": 1.703048567483144e-05, "loss": 0.8374, "step": 235200 }, { "epoch": 4.129461542513035, "grad_norm": 0.04847299816745394, "learning_rate": 1.7024583279058467e-05, "loss": 0.8429, "step": 235210 }, { "epoch": 4.129637107393037, "grad_norm": 0.05358846185542368, "learning_rate": 1.7018681947270116e-05, "loss": 0.8437, "step": 235220 }, { "epoch": 4.129812672273038, "grad_norm": 0.052335119158798356, "learning_rate": 1.7012781679541153e-05, "loss": 0.85, "step": 235230 }, { "epoch": 4.12998823715304, "grad_norm": 0.04650760144617285, "learning_rate": 1.700688247594638e-05, "loss": 0.8367, "step": 235240 }, { "epoch": 4.130163802033041, "grad_norm": 0.05134971733082181, "learning_rate": 1.7000984336560473e-05, "loss": 0.8381, "step": 235250 }, { "epoch": 4.130339366913042, "grad_norm": 0.05810557649206399, "learning_rate": 1.699508726145819e-05, "loss": 0.849, "step": 235260 }, { "epoch": 4.130514931793044, "grad_norm": 0.06055077128009558, "learning_rate": 1.698919125071428e-05, "loss": 0.8313, "step": 235270 }, { "epoch": 4.130690496673045, "grad_norm": 0.06918774925685399, "learning_rate": 1.6983296304403387e-05, "loss": 0.8382, "step": 235280 }, { "epoch": 4.130866061553047, "grad_norm": 0.06615788086079645, "learning_rate": 1.6977402422600235e-05, "loss": 0.8415, "step": 235290 }, { "epoch": 4.131041626433048, "grad_norm": 0.046203195242998076, "learning_rate": 1.6971509605379506e-05, "loss": 0.8371, "step": 235300 }, { "epoch": 4.131217191313049, "grad_norm": 0.04651898132111819, "learning_rate": 1.6965617852815867e-05, "loss": 0.8417, "step": 235310 }, { "epoch": 4.131392756193051, "grad_norm": 0.05890475580927434, "learning_rate": 1.6959727164983926e-05, "loss": 0.8458, "step": 235320 }, { "epoch": 4.131568321073052, "grad_norm": 0.041264092471387966, "learning_rate": 1.6953837541958363e-05, "loss": 0.8342, "step": 235330 }, { "epoch": 4.131743885953054, "grad_norm": 0.045375160281706836, "learning_rate": 1.694794898381379e-05, "loss": 0.8457, "step": 235340 }, { "epoch": 4.131919450833055, "grad_norm": 0.05925223592052818, "learning_rate": 1.6942061490624802e-05, "loss": 0.8424, "step": 235350 }, { "epoch": 4.132095015713057, "grad_norm": 0.04854567134132715, "learning_rate": 1.6936175062466004e-05, "loss": 0.8349, "step": 235360 }, { "epoch": 4.132270580593058, "grad_norm": 0.09361928160062316, "learning_rate": 1.6930289699412e-05, "loss": 0.8429, "step": 235370 }, { "epoch": 4.132446145473059, "grad_norm": 0.04741882243031577, "learning_rate": 1.6924405401537318e-05, "loss": 0.8382, "step": 235380 }, { "epoch": 4.132621710353061, "grad_norm": 0.050010070683402484, "learning_rate": 1.6918522168916538e-05, "loss": 0.8469, "step": 235390 }, { "epoch": 4.132797275233062, "grad_norm": 0.05365008111230403, "learning_rate": 1.691264000162418e-05, "loss": 0.8426, "step": 235400 }, { "epoch": 4.132972840113064, "grad_norm": 0.049387743320496925, "learning_rate": 1.6906758899734823e-05, "loss": 0.8408, "step": 235410 }, { "epoch": 4.133148404993065, "grad_norm": 0.05324731514706297, "learning_rate": 1.690087886332292e-05, "loss": 0.8359, "step": 235420 }, { "epoch": 4.133323969873066, "grad_norm": 0.0675510109568414, "learning_rate": 1.6894999892462994e-05, "loss": 0.832, "step": 235430 }, { "epoch": 4.133499534753068, "grad_norm": 0.045571343731171034, "learning_rate": 1.6889121987229573e-05, "loss": 0.8489, "step": 235440 }, { "epoch": 4.133675099633069, "grad_norm": 0.059493831642811155, "learning_rate": 1.6883245147697068e-05, "loss": 0.8523, "step": 235450 }, { "epoch": 4.133850664513071, "grad_norm": 0.06869912102086842, "learning_rate": 1.6877369373939968e-05, "loss": 0.8445, "step": 235460 }, { "epoch": 4.134026229393072, "grad_norm": 0.04941765215386544, "learning_rate": 1.6871494666032712e-05, "loss": 0.8448, "step": 235470 }, { "epoch": 4.134201794273074, "grad_norm": 0.05441435699541412, "learning_rate": 1.686562102404977e-05, "loss": 0.8387, "step": 235480 }, { "epoch": 4.134377359153075, "grad_norm": 0.06000864308006045, "learning_rate": 1.685974844806551e-05, "loss": 0.8424, "step": 235490 }, { "epoch": 4.134552924033076, "grad_norm": 0.05493608513535751, "learning_rate": 1.6853876938154372e-05, "loss": 0.8383, "step": 235500 }, { "epoch": 4.134728488913078, "grad_norm": 0.04540740597754138, "learning_rate": 1.6848006494390754e-05, "loss": 0.8343, "step": 235510 }, { "epoch": 4.134904053793079, "grad_norm": 0.08109308998861921, "learning_rate": 1.684213711684901e-05, "loss": 0.84, "step": 235520 }, { "epoch": 4.135079618673081, "grad_norm": 0.052185776680238334, "learning_rate": 1.68362688056035e-05, "loss": 0.833, "step": 235530 }, { "epoch": 4.135255183553082, "grad_norm": 0.04118586517499772, "learning_rate": 1.683040156072863e-05, "loss": 0.8413, "step": 235540 }, { "epoch": 4.135430748433084, "grad_norm": 0.07381308690221731, "learning_rate": 1.682453538229867e-05, "loss": 0.8471, "step": 235550 }, { "epoch": 4.135606313313085, "grad_norm": 0.0454057346183786, "learning_rate": 1.6818670270387984e-05, "loss": 0.8342, "step": 235560 }, { "epoch": 4.135781878193086, "grad_norm": 0.05204877679000117, "learning_rate": 1.681280622507088e-05, "loss": 0.8439, "step": 235570 }, { "epoch": 4.135957443073088, "grad_norm": 0.06600689195419857, "learning_rate": 1.6806943246421675e-05, "loss": 0.8403, "step": 235580 }, { "epoch": 4.136133007953089, "grad_norm": 0.05843261857898632, "learning_rate": 1.6801081334514618e-05, "loss": 0.8453, "step": 235590 }, { "epoch": 4.136308572833091, "grad_norm": 0.061291363878105735, "learning_rate": 1.6795220489423988e-05, "loss": 0.8394, "step": 235600 }, { "epoch": 4.136484137713092, "grad_norm": 0.05330032101929443, "learning_rate": 1.6789360711224082e-05, "loss": 0.8233, "step": 235610 }, { "epoch": 4.136659702593093, "grad_norm": 0.046081767634715526, "learning_rate": 1.6783501999989092e-05, "loss": 0.8439, "step": 235620 }, { "epoch": 4.136835267473095, "grad_norm": 0.06624259233382424, "learning_rate": 1.6777644355793263e-05, "loss": 0.8378, "step": 235630 }, { "epoch": 4.137010832353096, "grad_norm": 0.0478726082989011, "learning_rate": 1.677178777871083e-05, "loss": 0.8332, "step": 235640 }, { "epoch": 4.137186397233098, "grad_norm": 0.0563228169572709, "learning_rate": 1.676593226881601e-05, "loss": 0.8384, "step": 235650 }, { "epoch": 4.137361962113099, "grad_norm": 0.05230080852919149, "learning_rate": 1.6760077826182946e-05, "loss": 0.8321, "step": 235660 }, { "epoch": 4.1375375269931, "grad_norm": 0.062399143360944376, "learning_rate": 1.6754224450885848e-05, "loss": 0.8401, "step": 235670 }, { "epoch": 4.137713091873102, "grad_norm": 0.052180105891082816, "learning_rate": 1.6748372142998888e-05, "loss": 0.8416, "step": 235680 }, { "epoch": 4.137888656753103, "grad_norm": 0.05376964345806113, "learning_rate": 1.674252090259618e-05, "loss": 0.844, "step": 235690 }, { "epoch": 4.138064221633105, "grad_norm": 0.04158725937805825, "learning_rate": 1.6736670729751867e-05, "loss": 0.8471, "step": 235700 }, { "epoch": 4.138239786513106, "grad_norm": 0.04870385641231815, "learning_rate": 1.6730821624540123e-05, "loss": 0.8376, "step": 235710 }, { "epoch": 4.138415351393108, "grad_norm": 0.050256387700856456, "learning_rate": 1.672497358703499e-05, "loss": 0.8368, "step": 235720 }, { "epoch": 4.138590916273109, "grad_norm": 0.04999031492596745, "learning_rate": 1.67191266173106e-05, "loss": 0.8288, "step": 235730 }, { "epoch": 4.13876648115311, "grad_norm": 0.04947243159349324, "learning_rate": 1.671328071544102e-05, "loss": 0.8453, "step": 235740 }, { "epoch": 4.138942046033112, "grad_norm": 0.04431970440815433, "learning_rate": 1.6707435881500355e-05, "loss": 0.8398, "step": 235750 }, { "epoch": 4.139117610913113, "grad_norm": 0.07136447270315502, "learning_rate": 1.6701592115562597e-05, "loss": 0.839, "step": 235760 }, { "epoch": 4.139293175793115, "grad_norm": 0.05132152009182859, "learning_rate": 1.669574941770184e-05, "loss": 0.8321, "step": 235770 }, { "epoch": 4.139468740673116, "grad_norm": 0.04273271768582594, "learning_rate": 1.6689907787992108e-05, "loss": 0.8383, "step": 235780 }, { "epoch": 4.139644305553118, "grad_norm": 0.04032692357559119, "learning_rate": 1.6684067226507386e-05, "loss": 0.8361, "step": 235790 }, { "epoch": 4.139819870433119, "grad_norm": 0.05513283079877782, "learning_rate": 1.6678227733321677e-05, "loss": 0.8287, "step": 235800 }, { "epoch": 4.13999543531312, "grad_norm": 0.04904884588816031, "learning_rate": 1.6672389308508993e-05, "loss": 0.839, "step": 235810 }, { "epoch": 4.140171000193122, "grad_norm": 0.06833366917736175, "learning_rate": 1.6666551952143322e-05, "loss": 0.8424, "step": 235820 }, { "epoch": 4.140346565073123, "grad_norm": 0.05396863601377881, "learning_rate": 1.6660715664298575e-05, "loss": 0.8405, "step": 235830 }, { "epoch": 4.1405221299531245, "grad_norm": 0.044545584588989426, "learning_rate": 1.6654880445048723e-05, "loss": 0.8405, "step": 235840 }, { "epoch": 4.1406976948331256, "grad_norm": 0.06215820668274176, "learning_rate": 1.6649046294467726e-05, "loss": 0.8458, "step": 235850 }, { "epoch": 4.140873259713127, "grad_norm": 0.05422308441147754, "learning_rate": 1.664321321262945e-05, "loss": 0.8249, "step": 235860 }, { "epoch": 4.1410488245931285, "grad_norm": 0.044219291078601404, "learning_rate": 1.6637381199607836e-05, "loss": 0.8371, "step": 235870 }, { "epoch": 4.1412243894731295, "grad_norm": 0.04648765215742854, "learning_rate": 1.6631550255476797e-05, "loss": 0.8396, "step": 235880 }, { "epoch": 4.1413999543531315, "grad_norm": 0.045275227925774444, "learning_rate": 1.6625720380310147e-05, "loss": 0.8415, "step": 235890 }, { "epoch": 4.1415755192331325, "grad_norm": 0.05714498450499077, "learning_rate": 1.66198915741818e-05, "loss": 0.8369, "step": 235900 }, { "epoch": 4.141751084113134, "grad_norm": 0.05166985608833274, "learning_rate": 1.66140638371656e-05, "loss": 0.8346, "step": 235910 }, { "epoch": 4.1419266489931355, "grad_norm": 0.04714350163504745, "learning_rate": 1.6608237169335407e-05, "loss": 0.8391, "step": 235920 }, { "epoch": 4.1421022138731365, "grad_norm": 0.04798245387275625, "learning_rate": 1.6602411570765e-05, "loss": 0.8332, "step": 235930 }, { "epoch": 4.142277778753138, "grad_norm": 0.07181919278847415, "learning_rate": 1.6596587041528207e-05, "loss": 0.8403, "step": 235940 }, { "epoch": 4.142453343633139, "grad_norm": 0.05482673155595358, "learning_rate": 1.6590763581698853e-05, "loss": 0.8406, "step": 235950 }, { "epoch": 4.142628908513141, "grad_norm": 0.0550404319581507, "learning_rate": 1.6584941191350682e-05, "loss": 0.8348, "step": 235960 }, { "epoch": 4.142804473393142, "grad_norm": 0.05260776581451765, "learning_rate": 1.657911987055749e-05, "loss": 0.8338, "step": 235970 }, { "epoch": 4.142980038273143, "grad_norm": 0.04671229219606289, "learning_rate": 1.6573299619393022e-05, "loss": 0.8376, "step": 235980 }, { "epoch": 4.143155603153145, "grad_norm": 0.0524820160021772, "learning_rate": 1.6567480437931052e-05, "loss": 0.8346, "step": 235990 }, { "epoch": 4.143331168033146, "grad_norm": 0.05267774162636728, "learning_rate": 1.656166232624526e-05, "loss": 0.8404, "step": 236000 }, { "epoch": 4.143506732913148, "grad_norm": 0.04734261012256279, "learning_rate": 1.6555845284409393e-05, "loss": 0.8363, "step": 236010 }, { "epoch": 4.143682297793149, "grad_norm": 0.05093617439921944, "learning_rate": 1.6550029312497178e-05, "loss": 0.8482, "step": 236020 }, { "epoch": 4.143857862673151, "grad_norm": 0.07323958456687325, "learning_rate": 1.654421441058224e-05, "loss": 0.8428, "step": 236030 }, { "epoch": 4.144033427553152, "grad_norm": 0.05239952731467768, "learning_rate": 1.6538400578738305e-05, "loss": 0.8336, "step": 236040 }, { "epoch": 4.144208992433153, "grad_norm": 0.049296880847542975, "learning_rate": 1.653258781703904e-05, "loss": 0.8371, "step": 236050 }, { "epoch": 4.144384557313155, "grad_norm": 0.05769792878830423, "learning_rate": 1.6526776125558045e-05, "loss": 0.835, "step": 236060 }, { "epoch": 4.144560122193156, "grad_norm": 0.04116416752032641, "learning_rate": 1.6520965504368995e-05, "loss": 0.8462, "step": 236070 }, { "epoch": 4.144735687073158, "grad_norm": 0.05289467534712351, "learning_rate": 1.6515155953545498e-05, "loss": 0.8375, "step": 236080 }, { "epoch": 4.144911251953159, "grad_norm": 0.05660666153263503, "learning_rate": 1.6509347473161193e-05, "loss": 0.8414, "step": 236090 }, { "epoch": 4.14508681683316, "grad_norm": 0.054083937621200597, "learning_rate": 1.6503540063289626e-05, "loss": 0.8474, "step": 236100 }, { "epoch": 4.145262381713162, "grad_norm": 0.060056138612166986, "learning_rate": 1.6497733724004396e-05, "loss": 0.8354, "step": 236110 }, { "epoch": 4.145437946593163, "grad_norm": 0.04230713840816596, "learning_rate": 1.6491928455379102e-05, "loss": 0.8392, "step": 236120 }, { "epoch": 4.145613511473165, "grad_norm": 0.04511979245407741, "learning_rate": 1.6486124257487242e-05, "loss": 0.8503, "step": 236130 }, { "epoch": 4.145789076353166, "grad_norm": 0.07202310892109016, "learning_rate": 1.648032113040238e-05, "loss": 0.8481, "step": 236140 }, { "epoch": 4.145964641233168, "grad_norm": 0.07178529382405721, "learning_rate": 1.6474519074198058e-05, "loss": 0.8462, "step": 236150 }, { "epoch": 4.146140206113169, "grad_norm": 0.05183880292308848, "learning_rate": 1.6468718088947793e-05, "loss": 0.8415, "step": 236160 }, { "epoch": 4.14631577099317, "grad_norm": 0.046965253943769686, "learning_rate": 1.646291817472506e-05, "loss": 0.844, "step": 236170 }, { "epoch": 4.146491335873172, "grad_norm": 0.04388312515821237, "learning_rate": 1.645711933160334e-05, "loss": 0.8382, "step": 236180 }, { "epoch": 4.146666900753173, "grad_norm": 0.0466001946899459, "learning_rate": 1.6451321559656153e-05, "loss": 0.8459, "step": 236190 }, { "epoch": 4.146842465633175, "grad_norm": 0.06064388529568784, "learning_rate": 1.64455248589569e-05, "loss": 0.8363, "step": 236200 }, { "epoch": 4.147018030513176, "grad_norm": 0.055734733046907695, "learning_rate": 1.643972922957905e-05, "loss": 0.8412, "step": 236210 }, { "epoch": 4.147193595393178, "grad_norm": 0.04862710298526778, "learning_rate": 1.6433934671596068e-05, "loss": 0.8349, "step": 236220 }, { "epoch": 4.147369160273179, "grad_norm": 0.046437366471734784, "learning_rate": 1.642814118508131e-05, "loss": 0.8438, "step": 236230 }, { "epoch": 4.14754472515318, "grad_norm": 0.05732199042906949, "learning_rate": 1.642234877010822e-05, "loss": 0.8436, "step": 236240 }, { "epoch": 4.147720290033182, "grad_norm": 0.054608412367273075, "learning_rate": 1.6416557426750168e-05, "loss": 0.8386, "step": 236250 }, { "epoch": 4.147895854913183, "grad_norm": 0.04967602616705417, "learning_rate": 1.6410767155080574e-05, "loss": 0.838, "step": 236260 }, { "epoch": 4.148071419793185, "grad_norm": 0.04950897904272935, "learning_rate": 1.6404977955172742e-05, "loss": 0.8427, "step": 236270 }, { "epoch": 4.148246984673186, "grad_norm": 0.051482928242632404, "learning_rate": 1.6399189827100057e-05, "loss": 0.8308, "step": 236280 }, { "epoch": 4.148422549553187, "grad_norm": 0.16924808896800042, "learning_rate": 1.6393402770935867e-05, "loss": 0.8383, "step": 236290 }, { "epoch": 4.148598114433189, "grad_norm": 0.053831059090021546, "learning_rate": 1.6387616786753464e-05, "loss": 0.8375, "step": 236300 }, { "epoch": 4.14877367931319, "grad_norm": 0.06887628203703657, "learning_rate": 1.6381831874626175e-05, "loss": 0.8444, "step": 236310 }, { "epoch": 4.148949244193192, "grad_norm": 0.04751683187422008, "learning_rate": 1.637604803462728e-05, "loss": 0.8438, "step": 236320 }, { "epoch": 4.149124809073193, "grad_norm": 0.06049073846120699, "learning_rate": 1.637026526683011e-05, "loss": 0.8417, "step": 236330 }, { "epoch": 4.149300373953194, "grad_norm": 0.04162189838705405, "learning_rate": 1.6364483571307872e-05, "loss": 0.8424, "step": 236340 }, { "epoch": 4.149475938833196, "grad_norm": 0.04583285658284482, "learning_rate": 1.6358702948133846e-05, "loss": 0.8415, "step": 236350 }, { "epoch": 4.149651503713197, "grad_norm": 0.045116540655720155, "learning_rate": 1.6352923397381293e-05, "loss": 0.8455, "step": 236360 }, { "epoch": 4.149827068593199, "grad_norm": 0.04454238913467225, "learning_rate": 1.6347144919123405e-05, "loss": 0.8365, "step": 236370 }, { "epoch": 4.1500026334732, "grad_norm": 0.04998332115527799, "learning_rate": 1.634136751343341e-05, "loss": 0.8394, "step": 236380 }, { "epoch": 4.150178198353202, "grad_norm": 0.0482208698769195, "learning_rate": 1.6335591180384545e-05, "loss": 0.8408, "step": 236390 }, { "epoch": 4.150353763233203, "grad_norm": 0.05935776303785838, "learning_rate": 1.6329815920049947e-05, "loss": 0.8387, "step": 236400 }, { "epoch": 4.150529328113204, "grad_norm": 0.046425093503225046, "learning_rate": 1.6324041732502797e-05, "loss": 0.8483, "step": 236410 }, { "epoch": 4.150704892993206, "grad_norm": 0.05477139883367371, "learning_rate": 1.6318268617816272e-05, "loss": 0.8451, "step": 236420 }, { "epoch": 4.150880457873207, "grad_norm": 0.04902494497142857, "learning_rate": 1.6312496576063525e-05, "loss": 0.843, "step": 236430 }, { "epoch": 4.151056022753209, "grad_norm": 0.046023690259519585, "learning_rate": 1.6306725607317662e-05, "loss": 0.8434, "step": 236440 }, { "epoch": 4.15123158763321, "grad_norm": 0.065943493164625, "learning_rate": 1.6300955711651816e-05, "loss": 0.8371, "step": 236450 }, { "epoch": 4.151407152513212, "grad_norm": 0.046992222846045074, "learning_rate": 1.6295186889139126e-05, "loss": 0.8429, "step": 236460 }, { "epoch": 4.151582717393213, "grad_norm": 0.04273502380859984, "learning_rate": 1.628941913985261e-05, "loss": 0.8401, "step": 236470 }, { "epoch": 4.151758282273214, "grad_norm": 0.05098981613691101, "learning_rate": 1.62836524638654e-05, "loss": 0.839, "step": 236480 }, { "epoch": 4.151933847153216, "grad_norm": 0.053892519393925004, "learning_rate": 1.6277886861250555e-05, "loss": 0.8395, "step": 236490 }, { "epoch": 4.152109412033217, "grad_norm": 0.04865832622274255, "learning_rate": 1.6272122332081127e-05, "loss": 0.8499, "step": 236500 }, { "epoch": 4.152284976913219, "grad_norm": 0.046905171768748734, "learning_rate": 1.6266358876430136e-05, "loss": 0.8474, "step": 236510 }, { "epoch": 4.15246054179322, "grad_norm": 0.050033143153869246, "learning_rate": 1.6260596494370614e-05, "loss": 0.8264, "step": 236520 }, { "epoch": 4.152636106673221, "grad_norm": 0.05698828588424838, "learning_rate": 1.62548351859756e-05, "loss": 0.8386, "step": 236530 }, { "epoch": 4.152811671553223, "grad_norm": 0.06563010639159209, "learning_rate": 1.6249074951318037e-05, "loss": 0.8339, "step": 236540 }, { "epoch": 4.152987236433224, "grad_norm": 0.06883984108177145, "learning_rate": 1.6243315790470942e-05, "loss": 0.8315, "step": 236550 }, { "epoch": 4.153162801313226, "grad_norm": 0.07728611662406826, "learning_rate": 1.62375577035073e-05, "loss": 0.8342, "step": 236560 }, { "epoch": 4.153338366193227, "grad_norm": 0.05486314943498038, "learning_rate": 1.6231800690500028e-05, "loss": 0.8397, "step": 236570 }, { "epoch": 4.1535139310732285, "grad_norm": 0.053826608600699516, "learning_rate": 1.6226044751522082e-05, "loss": 0.8345, "step": 236580 }, { "epoch": 4.15368949595323, "grad_norm": 0.11458636235548834, "learning_rate": 1.6220289886646396e-05, "loss": 0.841, "step": 236590 }, { "epoch": 4.153865060833231, "grad_norm": 0.053723628479885224, "learning_rate": 1.621453609594591e-05, "loss": 0.8443, "step": 236600 }, { "epoch": 4.1540406257132325, "grad_norm": 0.042047771751866865, "learning_rate": 1.6208783379493478e-05, "loss": 0.8403, "step": 236610 }, { "epoch": 4.1542161905932335, "grad_norm": 0.04806760671409712, "learning_rate": 1.6203031737362003e-05, "loss": 0.8341, "step": 236620 }, { "epoch": 4.1543917554732355, "grad_norm": 0.04580662825110685, "learning_rate": 1.6197281169624392e-05, "loss": 0.8477, "step": 236630 }, { "epoch": 4.1545673203532365, "grad_norm": 0.056300080288815016, "learning_rate": 1.619153167635346e-05, "loss": 0.8395, "step": 236640 }, { "epoch": 4.1547428852332375, "grad_norm": 0.05451166660784311, "learning_rate": 1.618578325762208e-05, "loss": 0.842, "step": 236650 }, { "epoch": 4.1549184501132395, "grad_norm": 0.05468351209334234, "learning_rate": 1.6180035913503082e-05, "loss": 0.8343, "step": 236660 }, { "epoch": 4.1550940149932405, "grad_norm": 0.05493109022947597, "learning_rate": 1.617428964406931e-05, "loss": 0.8359, "step": 236670 }, { "epoch": 4.155269579873242, "grad_norm": 0.05544214683071618, "learning_rate": 1.616854444939351e-05, "loss": 0.8307, "step": 236680 }, { "epoch": 4.1554451447532434, "grad_norm": 0.04459873923791056, "learning_rate": 1.6162800329548523e-05, "loss": 0.8407, "step": 236690 }, { "epoch": 4.155620709633245, "grad_norm": 0.04676636056660861, "learning_rate": 1.6157057284607126e-05, "loss": 0.8349, "step": 236700 }, { "epoch": 4.155796274513246, "grad_norm": 0.0589630738133615, "learning_rate": 1.6151315314642054e-05, "loss": 0.839, "step": 236710 }, { "epoch": 4.155971839393247, "grad_norm": 0.04612836240794525, "learning_rate": 1.614557441972607e-05, "loss": 0.8353, "step": 236720 }, { "epoch": 4.156147404273249, "grad_norm": 0.05478908569954184, "learning_rate": 1.6139834599931956e-05, "loss": 0.8369, "step": 236730 }, { "epoch": 4.15632296915325, "grad_norm": 0.04852720758487893, "learning_rate": 1.613409585533238e-05, "loss": 0.8279, "step": 236740 }, { "epoch": 4.156498534033252, "grad_norm": 0.07508255153893044, "learning_rate": 1.6128358186000065e-05, "loss": 0.834, "step": 236750 }, { "epoch": 4.156674098913253, "grad_norm": 0.040427576470996915, "learning_rate": 1.612262159200772e-05, "loss": 0.8435, "step": 236760 }, { "epoch": 4.156849663793254, "grad_norm": 0.044259710695286635, "learning_rate": 1.6116886073428046e-05, "loss": 0.844, "step": 236770 }, { "epoch": 4.157025228673256, "grad_norm": 0.0561574071613516, "learning_rate": 1.6111151630333674e-05, "loss": 0.8326, "step": 236780 }, { "epoch": 4.157200793553257, "grad_norm": 0.0460077361208732, "learning_rate": 1.610541826279729e-05, "loss": 0.8347, "step": 236790 }, { "epoch": 4.157376358433259, "grad_norm": 0.05606647031732127, "learning_rate": 1.609968597089153e-05, "loss": 0.8328, "step": 236800 }, { "epoch": 4.15755192331326, "grad_norm": 0.04590145262521, "learning_rate": 1.6093954754689015e-05, "loss": 0.8433, "step": 236810 }, { "epoch": 4.157727488193262, "grad_norm": 0.056963302655967475, "learning_rate": 1.6088224614262352e-05, "loss": 0.8299, "step": 236820 }, { "epoch": 4.157903053073263, "grad_norm": 0.06070149308009766, "learning_rate": 1.6082495549684166e-05, "loss": 0.8343, "step": 236830 }, { "epoch": 4.158078617953264, "grad_norm": 0.04691397168672473, "learning_rate": 1.6076767561027046e-05, "loss": 0.8357, "step": 236840 }, { "epoch": 4.158254182833266, "grad_norm": 0.054236265878029835, "learning_rate": 1.6071040648363536e-05, "loss": 0.8457, "step": 236850 }, { "epoch": 4.158429747713267, "grad_norm": 0.05124961035247888, "learning_rate": 1.6065314811766227e-05, "loss": 0.8415, "step": 236860 }, { "epoch": 4.158605312593269, "grad_norm": 0.07765043641203366, "learning_rate": 1.6059590051307676e-05, "loss": 0.8425, "step": 236870 }, { "epoch": 4.15878087747327, "grad_norm": 0.07064221166643189, "learning_rate": 1.6053866367060375e-05, "loss": 0.8418, "step": 236880 }, { "epoch": 4.158956442353272, "grad_norm": 0.06201970215749725, "learning_rate": 1.604814375909686e-05, "loss": 0.8392, "step": 236890 }, { "epoch": 4.159132007233273, "grad_norm": 0.06128914719511327, "learning_rate": 1.6042422227489683e-05, "loss": 0.8426, "step": 236900 }, { "epoch": 4.159307572113274, "grad_norm": 0.06959942277781714, "learning_rate": 1.603670177231127e-05, "loss": 0.8405, "step": 236910 }, { "epoch": 4.159483136993276, "grad_norm": 0.0476633003433106, "learning_rate": 1.6030982393634114e-05, "loss": 0.837, "step": 236920 }, { "epoch": 4.159658701873277, "grad_norm": 0.051979062890963275, "learning_rate": 1.602526409153071e-05, "loss": 0.8354, "step": 236930 }, { "epoch": 4.159834266753279, "grad_norm": 0.04492333516414296, "learning_rate": 1.601954686607352e-05, "loss": 0.8456, "step": 236940 }, { "epoch": 4.16000983163328, "grad_norm": 0.053928238339920415, "learning_rate": 1.601383071733492e-05, "loss": 0.8412, "step": 236950 }, { "epoch": 4.160185396513281, "grad_norm": 0.04417052735165964, "learning_rate": 1.600811564538739e-05, "loss": 0.8351, "step": 236960 }, { "epoch": 4.160360961393283, "grad_norm": 0.04890983767434255, "learning_rate": 1.6002401650303335e-05, "loss": 0.8378, "step": 236970 }, { "epoch": 4.160536526273284, "grad_norm": 0.05537320622626246, "learning_rate": 1.5996688732155126e-05, "loss": 0.8388, "step": 236980 }, { "epoch": 4.160712091153286, "grad_norm": 0.058094114262345994, "learning_rate": 1.5990976891015147e-05, "loss": 0.8354, "step": 236990 }, { "epoch": 4.160887656033287, "grad_norm": 0.04848185044570661, "learning_rate": 1.59852661269558e-05, "loss": 0.8426, "step": 237000 }, { "epoch": 4.161063220913288, "grad_norm": 0.06291936243096229, "learning_rate": 1.5979556440049437e-05, "loss": 0.8354, "step": 237010 }, { "epoch": 4.16123878579329, "grad_norm": 0.044586652081064264, "learning_rate": 1.597384783036836e-05, "loss": 0.8355, "step": 237020 }, { "epoch": 4.161414350673291, "grad_norm": 0.061842581919870195, "learning_rate": 1.5968140297984934e-05, "loss": 0.8352, "step": 237030 }, { "epoch": 4.161589915553293, "grad_norm": 0.054080246400189894, "learning_rate": 1.5962433842971497e-05, "loss": 0.8367, "step": 237040 }, { "epoch": 4.161765480433294, "grad_norm": 0.04854651497868882, "learning_rate": 1.595672846540029e-05, "loss": 0.8399, "step": 237050 }, { "epoch": 4.161941045313296, "grad_norm": 0.04830017186921659, "learning_rate": 1.5951024165343633e-05, "loss": 0.8379, "step": 237060 }, { "epoch": 4.162116610193297, "grad_norm": 0.06103574203836695, "learning_rate": 1.5945320942873817e-05, "loss": 0.8345, "step": 237070 }, { "epoch": 4.162292175073298, "grad_norm": 0.051547795610638596, "learning_rate": 1.593961879806307e-05, "loss": 0.8472, "step": 237080 }, { "epoch": 4.1624677399533, "grad_norm": 0.04719569858024964, "learning_rate": 1.593391773098365e-05, "loss": 0.8416, "step": 237090 }, { "epoch": 4.162643304833301, "grad_norm": 0.0472460152102647, "learning_rate": 1.592821774170779e-05, "loss": 0.8415, "step": 237100 }, { "epoch": 4.162818869713303, "grad_norm": 0.04805976251699496, "learning_rate": 1.592251883030775e-05, "loss": 0.8341, "step": 237110 }, { "epoch": 4.162994434593304, "grad_norm": 0.06615494413622988, "learning_rate": 1.591682099685567e-05, "loss": 0.8338, "step": 237120 }, { "epoch": 4.163169999473306, "grad_norm": 0.06304057210236624, "learning_rate": 1.591112424142378e-05, "loss": 0.8427, "step": 237130 }, { "epoch": 4.163345564353307, "grad_norm": 0.056519530910250415, "learning_rate": 1.5905428564084273e-05, "loss": 0.8334, "step": 237140 }, { "epoch": 4.163521129233308, "grad_norm": 0.057614687349324685, "learning_rate": 1.589973396490926e-05, "loss": 0.838, "step": 237150 }, { "epoch": 4.16369669411331, "grad_norm": 0.058331300911747735, "learning_rate": 1.5894040443970947e-05, "loss": 0.8403, "step": 237160 }, { "epoch": 4.163872258993311, "grad_norm": 0.05596665658772206, "learning_rate": 1.5888348001341442e-05, "loss": 0.8401, "step": 237170 }, { "epoch": 4.164047823873313, "grad_norm": 0.05631186987269469, "learning_rate": 1.5882656637092907e-05, "loss": 0.8389, "step": 237180 }, { "epoch": 4.164223388753314, "grad_norm": 0.05183086383617692, "learning_rate": 1.5876966351297395e-05, "loss": 0.836, "step": 237190 }, { "epoch": 4.164398953633315, "grad_norm": 0.04839112153845961, "learning_rate": 1.5871277144027035e-05, "loss": 0.8399, "step": 237200 }, { "epoch": 4.164574518513317, "grad_norm": 0.05096228441292146, "learning_rate": 1.5865589015353925e-05, "loss": 0.8418, "step": 237210 }, { "epoch": 4.164750083393318, "grad_norm": 0.05070805929886248, "learning_rate": 1.585990196535011e-05, "loss": 0.8412, "step": 237220 }, { "epoch": 4.16492564827332, "grad_norm": 0.04771064365845065, "learning_rate": 1.5854215994087648e-05, "loss": 0.8457, "step": 237230 }, { "epoch": 4.165101213153321, "grad_norm": 0.05821420046596766, "learning_rate": 1.5848531101638615e-05, "loss": 0.8346, "step": 237240 }, { "epoch": 4.165276778033323, "grad_norm": 0.050380196459206904, "learning_rate": 1.584284728807498e-05, "loss": 0.8385, "step": 237250 }, { "epoch": 4.165452342913324, "grad_norm": 0.05986865887273319, "learning_rate": 1.583716455346879e-05, "loss": 0.8304, "step": 237260 }, { "epoch": 4.165627907793325, "grad_norm": 0.047667895467381816, "learning_rate": 1.583148289789205e-05, "loss": 0.8368, "step": 237270 }, { "epoch": 4.165803472673327, "grad_norm": 0.07328197298065893, "learning_rate": 1.582580232141677e-05, "loss": 0.8415, "step": 237280 }, { "epoch": 4.165979037553328, "grad_norm": 0.05816886738409154, "learning_rate": 1.5820122824114873e-05, "loss": 0.8406, "step": 237290 }, { "epoch": 4.16615460243333, "grad_norm": 0.0415941708577262, "learning_rate": 1.5814444406058345e-05, "loss": 0.8341, "step": 237300 }, { "epoch": 4.166330167313331, "grad_norm": 0.05268399529138067, "learning_rate": 1.5808767067319156e-05, "loss": 0.8412, "step": 237310 }, { "epoch": 4.166505732193332, "grad_norm": 0.07599920427770494, "learning_rate": 1.5803090807969194e-05, "loss": 0.8405, "step": 237320 }, { "epoch": 4.166681297073334, "grad_norm": 0.054540809654889416, "learning_rate": 1.5797415628080395e-05, "loss": 0.8363, "step": 237330 }, { "epoch": 4.166856861953335, "grad_norm": 0.061538662327750314, "learning_rate": 1.5791741527724673e-05, "loss": 0.835, "step": 237340 }, { "epoch": 4.1670324268333365, "grad_norm": 0.06493535824630037, "learning_rate": 1.578606850697393e-05, "loss": 0.8481, "step": 237350 }, { "epoch": 4.167207991713338, "grad_norm": 0.05139497308999755, "learning_rate": 1.5780396565900016e-05, "loss": 0.8409, "step": 237360 }, { "epoch": 4.1673835565933395, "grad_norm": 0.063509201220524, "learning_rate": 1.5774725704574804e-05, "loss": 0.837, "step": 237370 }, { "epoch": 4.1675591214733405, "grad_norm": 0.0626869586152941, "learning_rate": 1.576905592307018e-05, "loss": 0.8392, "step": 237380 }, { "epoch": 4.1677346863533415, "grad_norm": 0.048931749244859164, "learning_rate": 1.5763387221457926e-05, "loss": 0.8348, "step": 237390 }, { "epoch": 4.1679102512333435, "grad_norm": 0.047471507267144684, "learning_rate": 1.575771959980989e-05, "loss": 0.827, "step": 237400 }, { "epoch": 4.1680858161133445, "grad_norm": 0.04811257292690325, "learning_rate": 1.575205305819791e-05, "loss": 0.8505, "step": 237410 }, { "epoch": 4.168261380993346, "grad_norm": 0.05539784344065212, "learning_rate": 1.5746387596693722e-05, "loss": 0.8409, "step": 237420 }, { "epoch": 4.1684369458733475, "grad_norm": 0.05175957261514728, "learning_rate": 1.5740723215369148e-05, "loss": 0.8357, "step": 237430 }, { "epoch": 4.1686125107533485, "grad_norm": 0.04940206573455357, "learning_rate": 1.573505991429595e-05, "loss": 0.8494, "step": 237440 }, { "epoch": 4.16878807563335, "grad_norm": 0.0465056445409293, "learning_rate": 1.5729397693545912e-05, "loss": 0.832, "step": 237450 }, { "epoch": 4.168963640513351, "grad_norm": 0.05257347918348014, "learning_rate": 1.5723736553190728e-05, "loss": 0.8368, "step": 237460 }, { "epoch": 4.169139205393353, "grad_norm": 0.05267459279620543, "learning_rate": 1.5718076493302135e-05, "loss": 0.8383, "step": 237470 }, { "epoch": 4.169314770273354, "grad_norm": 0.058759991617948364, "learning_rate": 1.5712417513951883e-05, "loss": 0.8373, "step": 237480 }, { "epoch": 4.169490335153356, "grad_norm": 0.05212115295936904, "learning_rate": 1.5706759615211632e-05, "loss": 0.8403, "step": 237490 }, { "epoch": 4.169665900033357, "grad_norm": 0.046726293820642695, "learning_rate": 1.5701102797153068e-05, "loss": 0.8347, "step": 237500 }, { "epoch": 4.169841464913358, "grad_norm": 0.05068644019056657, "learning_rate": 1.569544705984789e-05, "loss": 0.8393, "step": 237510 }, { "epoch": 4.17001702979336, "grad_norm": 0.06302071799726015, "learning_rate": 1.5689792403367764e-05, "loss": 0.8406, "step": 237520 }, { "epoch": 4.170192594673361, "grad_norm": 0.052858039205676666, "learning_rate": 1.5684138827784295e-05, "loss": 0.8355, "step": 237530 }, { "epoch": 4.170368159553363, "grad_norm": 0.07099572686054266, "learning_rate": 1.5678486333169135e-05, "loss": 0.8375, "step": 237540 }, { "epoch": 4.170543724433364, "grad_norm": 0.07666764848604417, "learning_rate": 1.5672834919593933e-05, "loss": 0.8447, "step": 237550 }, { "epoch": 4.170719289313365, "grad_norm": 0.0558475366459478, "learning_rate": 1.5667184587130236e-05, "loss": 0.8394, "step": 237560 }, { "epoch": 4.170894854193367, "grad_norm": 0.044683290110733326, "learning_rate": 1.566153533584967e-05, "loss": 0.8371, "step": 237570 }, { "epoch": 4.171070419073368, "grad_norm": 0.061291423840917986, "learning_rate": 1.5655887165823835e-05, "loss": 0.8376, "step": 237580 }, { "epoch": 4.17124598395337, "grad_norm": 0.09024125421251072, "learning_rate": 1.5650240077124234e-05, "loss": 0.8315, "step": 237590 }, { "epoch": 4.171421548833371, "grad_norm": 0.05236051773235789, "learning_rate": 1.5644594069822447e-05, "loss": 0.843, "step": 237600 }, { "epoch": 4.171597113713373, "grad_norm": 0.06834745671328477, "learning_rate": 1.5638949143990015e-05, "loss": 0.8419, "step": 237610 }, { "epoch": 4.171772678593374, "grad_norm": 0.057630257263934184, "learning_rate": 1.563330529969849e-05, "loss": 0.8386, "step": 237620 }, { "epoch": 4.171948243473375, "grad_norm": 0.05600033755598047, "learning_rate": 1.5627662537019314e-05, "loss": 0.8302, "step": 237630 }, { "epoch": 4.172123808353377, "grad_norm": 0.05917292644649656, "learning_rate": 1.5622020856024004e-05, "loss": 0.8416, "step": 237640 }, { "epoch": 4.172299373233378, "grad_norm": 0.05863402948987593, "learning_rate": 1.561638025678409e-05, "loss": 0.8485, "step": 237650 }, { "epoch": 4.17247493811338, "grad_norm": 0.05362645081248306, "learning_rate": 1.561074073937098e-05, "loss": 0.832, "step": 237660 }, { "epoch": 4.172650502993381, "grad_norm": 0.09079964418284694, "learning_rate": 1.560510230385615e-05, "loss": 0.8399, "step": 237670 }, { "epoch": 4.172826067873382, "grad_norm": 0.05378746473615646, "learning_rate": 1.559946495031103e-05, "loss": 0.8377, "step": 237680 }, { "epoch": 4.173001632753384, "grad_norm": 0.04365943146091295, "learning_rate": 1.559382867880708e-05, "loss": 0.8365, "step": 237690 }, { "epoch": 4.173177197633385, "grad_norm": 0.05543504337218526, "learning_rate": 1.5588193489415673e-05, "loss": 0.8356, "step": 237700 }, { "epoch": 4.173352762513387, "grad_norm": 0.047253386912819315, "learning_rate": 1.5582559382208215e-05, "loss": 0.834, "step": 237710 }, { "epoch": 4.173528327393388, "grad_norm": 0.04496636398156135, "learning_rate": 1.5576926357256137e-05, "loss": 0.8457, "step": 237720 }, { "epoch": 4.17370389227339, "grad_norm": 0.04860665429584265, "learning_rate": 1.5571294414630737e-05, "loss": 0.8394, "step": 237730 }, { "epoch": 4.173879457153391, "grad_norm": 0.051118056731141955, "learning_rate": 1.5565663554403414e-05, "loss": 0.8371, "step": 237740 }, { "epoch": 4.174055022033392, "grad_norm": 0.0518730428114326, "learning_rate": 1.5560033776645526e-05, "loss": 0.8332, "step": 237750 }, { "epoch": 4.174230586913394, "grad_norm": 0.048891551937543354, "learning_rate": 1.5554405081428372e-05, "loss": 0.8517, "step": 237760 }, { "epoch": 4.174406151793395, "grad_norm": 0.0391635186800861, "learning_rate": 1.5548777468823273e-05, "loss": 0.8437, "step": 237770 }, { "epoch": 4.174581716673397, "grad_norm": 0.0539687883273859, "learning_rate": 1.5543150938901538e-05, "loss": 0.8333, "step": 237780 }, { "epoch": 4.174757281553398, "grad_norm": 0.06151670818098311, "learning_rate": 1.553752549173449e-05, "loss": 0.8375, "step": 237790 }, { "epoch": 4.1749328464334, "grad_norm": 0.0549945364949087, "learning_rate": 1.5531901127393344e-05, "loss": 0.8487, "step": 237800 }, { "epoch": 4.175108411313401, "grad_norm": 0.05462192648645926, "learning_rate": 1.552627784594939e-05, "loss": 0.8411, "step": 237810 }, { "epoch": 4.175283976193402, "grad_norm": 0.04614672456225115, "learning_rate": 1.5520655647473898e-05, "loss": 0.8398, "step": 237820 }, { "epoch": 4.175459541073404, "grad_norm": 0.0734560295569145, "learning_rate": 1.5515034532038063e-05, "loss": 0.8398, "step": 237830 }, { "epoch": 4.175635105953405, "grad_norm": 0.065757225490644, "learning_rate": 1.550941449971312e-05, "loss": 0.8353, "step": 237840 }, { "epoch": 4.175810670833407, "grad_norm": 0.0662112726089326, "learning_rate": 1.5503795550570283e-05, "loss": 0.8296, "step": 237850 }, { "epoch": 4.175986235713408, "grad_norm": 0.04530865926134366, "learning_rate": 1.5498177684680757e-05, "loss": 0.8419, "step": 237860 }, { "epoch": 4.176161800593409, "grad_norm": 0.057765958238872026, "learning_rate": 1.5492560902115695e-05, "loss": 0.8353, "step": 237870 }, { "epoch": 4.176337365473411, "grad_norm": 0.04622158848437269, "learning_rate": 1.548694520294627e-05, "loss": 0.8357, "step": 237880 }, { "epoch": 4.176512930353412, "grad_norm": 0.05829067144167094, "learning_rate": 1.5481330587243655e-05, "loss": 0.8415, "step": 237890 }, { "epoch": 4.176688495233414, "grad_norm": 0.04575188789061447, "learning_rate": 1.547571705507895e-05, "loss": 0.8481, "step": 237900 }, { "epoch": 4.176864060113415, "grad_norm": 0.0456266517685205, "learning_rate": 1.5470104606523302e-05, "loss": 0.853, "step": 237910 }, { "epoch": 4.177039624993417, "grad_norm": 0.06799142098790438, "learning_rate": 1.546449324164785e-05, "loss": 0.8431, "step": 237920 }, { "epoch": 4.177215189873418, "grad_norm": 0.04801579457301374, "learning_rate": 1.545888296052363e-05, "loss": 0.8352, "step": 237930 }, { "epoch": 4.177390754753419, "grad_norm": 0.05298234524790623, "learning_rate": 1.545327376322175e-05, "loss": 0.8431, "step": 237940 }, { "epoch": 4.177566319633421, "grad_norm": 0.06028313896875768, "learning_rate": 1.5447665649813298e-05, "loss": 0.8468, "step": 237950 }, { "epoch": 4.177741884513422, "grad_norm": 0.08595093063632316, "learning_rate": 1.5442058620369344e-05, "loss": 0.8381, "step": 237960 }, { "epoch": 4.177917449393424, "grad_norm": 0.053457935053715724, "learning_rate": 1.543645267496087e-05, "loss": 0.8362, "step": 237970 }, { "epoch": 4.178093014273425, "grad_norm": 0.061695570243840855, "learning_rate": 1.543084781365895e-05, "loss": 0.8397, "step": 237980 }, { "epoch": 4.178268579153426, "grad_norm": 0.0588664177801311, "learning_rate": 1.5425244036534604e-05, "loss": 0.8417, "step": 237990 }, { "epoch": 4.178444144033428, "grad_norm": 0.059200242156609566, "learning_rate": 1.5419641343658786e-05, "loss": 0.8446, "step": 238000 }, { "epoch": 4.178619708913429, "grad_norm": 0.052073721332885944, "learning_rate": 1.541403973510253e-05, "loss": 0.8456, "step": 238010 }, { "epoch": 4.178795273793431, "grad_norm": 0.053001376720151476, "learning_rate": 1.5408439210936777e-05, "loss": 0.8428, "step": 238020 }, { "epoch": 4.178970838673432, "grad_norm": 0.05679707690995328, "learning_rate": 1.5402839771232532e-05, "loss": 0.8416, "step": 238030 }, { "epoch": 4.179146403553434, "grad_norm": 0.05432236250435067, "learning_rate": 1.5397241416060694e-05, "loss": 0.8488, "step": 238040 }, { "epoch": 4.179321968433435, "grad_norm": 0.05266965896415285, "learning_rate": 1.5391644145492214e-05, "loss": 0.8357, "step": 238050 }, { "epoch": 4.179497533313436, "grad_norm": 0.05344514242241278, "learning_rate": 1.5386047959598022e-05, "loss": 0.8425, "step": 238060 }, { "epoch": 4.179673098193438, "grad_norm": 0.053132226976448996, "learning_rate": 1.5380452858448998e-05, "loss": 0.8295, "step": 238070 }, { "epoch": 4.179848663073439, "grad_norm": 0.05410261630711112, "learning_rate": 1.5374858842116032e-05, "loss": 0.8379, "step": 238080 }, { "epoch": 4.1800242279534405, "grad_norm": 0.046992945798786843, "learning_rate": 1.536926591067005e-05, "loss": 0.8336, "step": 238090 }, { "epoch": 4.180199792833442, "grad_norm": 0.05624715944763452, "learning_rate": 1.536367406418185e-05, "loss": 0.8325, "step": 238100 }, { "epoch": 4.180375357713443, "grad_norm": 0.04987329351990984, "learning_rate": 1.5358083302722316e-05, "loss": 0.8387, "step": 238110 }, { "epoch": 4.1805509225934445, "grad_norm": 0.05591762346623597, "learning_rate": 1.5352493626362277e-05, "loss": 0.8313, "step": 238120 }, { "epoch": 4.1807264874734456, "grad_norm": 0.06251201261890421, "learning_rate": 1.5346905035172583e-05, "loss": 0.8356, "step": 238130 }, { "epoch": 4.1809020523534475, "grad_norm": 0.054127012745713673, "learning_rate": 1.5341317529223998e-05, "loss": 0.8434, "step": 238140 }, { "epoch": 4.1810776172334485, "grad_norm": 0.08327994273071626, "learning_rate": 1.5335731108587324e-05, "loss": 0.8448, "step": 238150 }, { "epoch": 4.18125318211345, "grad_norm": 0.056472425914585574, "learning_rate": 1.5330145773333394e-05, "loss": 0.8326, "step": 238160 }, { "epoch": 4.1814287469934515, "grad_norm": 0.08418211993514242, "learning_rate": 1.53245615235329e-05, "loss": 0.8335, "step": 238170 }, { "epoch": 4.1816043118734525, "grad_norm": 0.05103268395533753, "learning_rate": 1.5318978359256634e-05, "loss": 0.8345, "step": 238180 }, { "epoch": 4.181779876753454, "grad_norm": 0.05470066149117324, "learning_rate": 1.5313396280575327e-05, "loss": 0.8374, "step": 238190 }, { "epoch": 4.1819554416334555, "grad_norm": 0.057853097571775244, "learning_rate": 1.5307815287559744e-05, "loss": 0.8423, "step": 238200 }, { "epoch": 4.182131006513457, "grad_norm": 0.05315194395924367, "learning_rate": 1.530223538028054e-05, "loss": 0.8427, "step": 238210 }, { "epoch": 4.182306571393458, "grad_norm": 0.051635923520867315, "learning_rate": 1.529665655880844e-05, "loss": 0.8392, "step": 238220 }, { "epoch": 4.182482136273459, "grad_norm": 0.04540957503456549, "learning_rate": 1.5291078823214143e-05, "loss": 0.8445, "step": 238230 }, { "epoch": 4.182657701153461, "grad_norm": 0.04710786587161601, "learning_rate": 1.5285502173568283e-05, "loss": 0.8373, "step": 238240 }, { "epoch": 4.182833266033462, "grad_norm": 0.061006359790733745, "learning_rate": 1.5279926609941534e-05, "loss": 0.8386, "step": 238250 }, { "epoch": 4.183008830913464, "grad_norm": 0.057740611063476464, "learning_rate": 1.5274352132404568e-05, "loss": 0.85, "step": 238260 }, { "epoch": 4.183184395793465, "grad_norm": 0.054706696408251834, "learning_rate": 1.5268778741027968e-05, "loss": 0.8379, "step": 238270 }, { "epoch": 4.183359960673467, "grad_norm": 0.04964071688304258, "learning_rate": 1.5263206435882368e-05, "loss": 0.8412, "step": 238280 }, { "epoch": 4.183535525553468, "grad_norm": 0.04826037468763159, "learning_rate": 1.5257635217038374e-05, "loss": 0.8385, "step": 238290 }, { "epoch": 4.183711090433469, "grad_norm": 0.0517257219026431, "learning_rate": 1.5252065084566606e-05, "loss": 0.8426, "step": 238300 }, { "epoch": 4.183886655313471, "grad_norm": 0.046895920357134614, "learning_rate": 1.5246496038537578e-05, "loss": 0.8482, "step": 238310 }, { "epoch": 4.184062220193472, "grad_norm": 0.04931874082693212, "learning_rate": 1.5240928079021882e-05, "loss": 0.8401, "step": 238320 }, { "epoch": 4.184237785073474, "grad_norm": 0.048411562807018416, "learning_rate": 1.5235361206090077e-05, "loss": 0.8385, "step": 238330 }, { "epoch": 4.184413349953475, "grad_norm": 0.05111557248148754, "learning_rate": 1.5229795419812675e-05, "loss": 0.8319, "step": 238340 }, { "epoch": 4.184588914833476, "grad_norm": 0.047961582454427186, "learning_rate": 1.5224230720260194e-05, "loss": 0.8409, "step": 238350 }, { "epoch": 4.184764479713478, "grad_norm": 0.06208081091379162, "learning_rate": 1.521866710750314e-05, "loss": 0.8417, "step": 238360 }, { "epoch": 4.184940044593479, "grad_norm": 0.05203853841828185, "learning_rate": 1.521310458161205e-05, "loss": 0.8383, "step": 238370 }, { "epoch": 4.185115609473481, "grad_norm": 0.06759735805714266, "learning_rate": 1.5207543142657337e-05, "loss": 0.8416, "step": 238380 }, { "epoch": 4.185291174353482, "grad_norm": 0.05611924766516319, "learning_rate": 1.5201982790709495e-05, "loss": 0.837, "step": 238390 }, { "epoch": 4.185466739233484, "grad_norm": 0.0634211908584395, "learning_rate": 1.5196423525838996e-05, "loss": 0.8413, "step": 238400 }, { "epoch": 4.185642304113485, "grad_norm": 0.061739196937488326, "learning_rate": 1.5190865348116236e-05, "loss": 0.8359, "step": 238410 }, { "epoch": 4.185817868993486, "grad_norm": 0.06297684707926873, "learning_rate": 1.5185308257611653e-05, "loss": 0.8392, "step": 238420 }, { "epoch": 4.185993433873488, "grad_norm": 0.06281775791925857, "learning_rate": 1.5179752254395682e-05, "loss": 0.8399, "step": 238430 }, { "epoch": 4.186168998753489, "grad_norm": 0.042584733783266424, "learning_rate": 1.5174197338538684e-05, "loss": 0.8356, "step": 238440 }, { "epoch": 4.186344563633491, "grad_norm": 0.05077358454852594, "learning_rate": 1.5168643510111047e-05, "loss": 0.8457, "step": 238450 }, { "epoch": 4.186520128513492, "grad_norm": 0.05464533757142158, "learning_rate": 1.5163090769183152e-05, "loss": 0.8368, "step": 238460 }, { "epoch": 4.186695693393494, "grad_norm": 0.051602188373002636, "learning_rate": 1.5157539115825362e-05, "loss": 0.8408, "step": 238470 }, { "epoch": 4.186871258273495, "grad_norm": 0.04942576612999989, "learning_rate": 1.5151988550107982e-05, "loss": 0.8366, "step": 238480 }, { "epoch": 4.187046823153496, "grad_norm": 0.059212400463179556, "learning_rate": 1.5146439072101368e-05, "loss": 0.8334, "step": 238490 }, { "epoch": 4.187222388033498, "grad_norm": 0.04668449345944676, "learning_rate": 1.5140890681875841e-05, "loss": 0.8337, "step": 238500 }, { "epoch": 4.187397952913499, "grad_norm": 0.06368415265119988, "learning_rate": 1.513534337950166e-05, "loss": 0.8434, "step": 238510 }, { "epoch": 4.187573517793501, "grad_norm": 0.055549351584762596, "learning_rate": 1.512979716504913e-05, "loss": 0.8392, "step": 238520 }, { "epoch": 4.187749082673502, "grad_norm": 0.06573308595289327, "learning_rate": 1.5124252038588538e-05, "loss": 0.8401, "step": 238530 }, { "epoch": 4.187924647553503, "grad_norm": 0.05313618164342479, "learning_rate": 1.5118708000190144e-05, "loss": 0.8408, "step": 238540 }, { "epoch": 4.188100212433505, "grad_norm": 0.04443712513462778, "learning_rate": 1.5113165049924158e-05, "loss": 0.8396, "step": 238550 }, { "epoch": 4.188275777313506, "grad_norm": 0.08258350336148287, "learning_rate": 1.510762318786083e-05, "loss": 0.8437, "step": 238560 }, { "epoch": 4.188451342193508, "grad_norm": 0.0472689925518072, "learning_rate": 1.5102082414070404e-05, "loss": 0.8322, "step": 238570 }, { "epoch": 4.188626907073509, "grad_norm": 0.06684211566884615, "learning_rate": 1.5096542728623034e-05, "loss": 0.8409, "step": 238580 }, { "epoch": 4.18880247195351, "grad_norm": 0.046224175795940375, "learning_rate": 1.5091004131588934e-05, "loss": 0.8408, "step": 238590 }, { "epoch": 4.188978036833512, "grad_norm": 0.041109895002831624, "learning_rate": 1.5085466623038295e-05, "loss": 0.836, "step": 238600 }, { "epoch": 4.189153601713513, "grad_norm": 0.06351291250265909, "learning_rate": 1.5079930203041238e-05, "loss": 0.8408, "step": 238610 }, { "epoch": 4.189329166593515, "grad_norm": 0.05287773736716652, "learning_rate": 1.507439487166792e-05, "loss": 0.8354, "step": 238620 }, { "epoch": 4.189504731473516, "grad_norm": 0.051011986095251254, "learning_rate": 1.50688606289885e-05, "loss": 0.8302, "step": 238630 }, { "epoch": 4.189680296353518, "grad_norm": 0.051753119312690096, "learning_rate": 1.5063327475073102e-05, "loss": 0.8415, "step": 238640 }, { "epoch": 4.189855861233519, "grad_norm": 0.06020016985127818, "learning_rate": 1.5057795409991793e-05, "loss": 0.8327, "step": 238650 }, { "epoch": 4.19003142611352, "grad_norm": 0.04974031002722971, "learning_rate": 1.5052264433814686e-05, "loss": 0.8285, "step": 238660 }, { "epoch": 4.190206990993522, "grad_norm": 0.05084526883883265, "learning_rate": 1.5046734546611885e-05, "loss": 0.8363, "step": 238670 }, { "epoch": 4.190382555873523, "grad_norm": 0.049117185458053936, "learning_rate": 1.5041205748453413e-05, "loss": 0.8335, "step": 238680 }, { "epoch": 4.190558120753525, "grad_norm": 0.043426739301558714, "learning_rate": 1.5035678039409328e-05, "loss": 0.8304, "step": 238690 }, { "epoch": 4.190733685633526, "grad_norm": 0.04694061840316071, "learning_rate": 1.5030151419549701e-05, "loss": 0.8388, "step": 238700 }, { "epoch": 4.190909250513528, "grad_norm": 0.1151666515540686, "learning_rate": 1.502462588894451e-05, "loss": 0.8318, "step": 238710 }, { "epoch": 4.191084815393529, "grad_norm": 0.058596476309650744, "learning_rate": 1.5019101447663795e-05, "loss": 0.8353, "step": 238720 }, { "epoch": 4.19126038027353, "grad_norm": 0.06004457338589939, "learning_rate": 1.501357809577753e-05, "loss": 0.8392, "step": 238730 }, { "epoch": 4.191435945153532, "grad_norm": 0.05209936406949446, "learning_rate": 1.5008055833355738e-05, "loss": 0.8361, "step": 238740 }, { "epoch": 4.191611510033533, "grad_norm": 0.05118218317308762, "learning_rate": 1.500253466046834e-05, "loss": 0.8403, "step": 238750 }, { "epoch": 4.191787074913535, "grad_norm": 0.048328573583636396, "learning_rate": 1.4997014577185307e-05, "loss": 0.8377, "step": 238760 }, { "epoch": 4.191962639793536, "grad_norm": 0.047682331677322054, "learning_rate": 1.4991495583576598e-05, "loss": 0.8327, "step": 238770 }, { "epoch": 4.192138204673537, "grad_norm": 0.04861208611219997, "learning_rate": 1.4985977679712104e-05, "loss": 0.8408, "step": 238780 }, { "epoch": 4.192313769553539, "grad_norm": 0.05597187846579625, "learning_rate": 1.4980460865661764e-05, "loss": 0.8444, "step": 238790 }, { "epoch": 4.19248933443354, "grad_norm": 0.04655114850839296, "learning_rate": 1.497494514149546e-05, "loss": 0.8363, "step": 238800 }, { "epoch": 4.192664899313542, "grad_norm": 0.06118203707475685, "learning_rate": 1.4969430507283119e-05, "loss": 0.8384, "step": 238810 }, { "epoch": 4.192840464193543, "grad_norm": 0.08836443897200208, "learning_rate": 1.4963916963094557e-05, "loss": 0.8412, "step": 238820 }, { "epoch": 4.1930160290735445, "grad_norm": 0.04726217725189252, "learning_rate": 1.4958404508999643e-05, "loss": 0.8287, "step": 238830 }, { "epoch": 4.193191593953546, "grad_norm": 0.06154243401955423, "learning_rate": 1.4952893145068273e-05, "loss": 0.8375, "step": 238840 }, { "epoch": 4.193367158833547, "grad_norm": 0.05242484017947446, "learning_rate": 1.4947382871370206e-05, "loss": 0.8425, "step": 238850 }, { "epoch": 4.1935427237135485, "grad_norm": 0.05206156723495848, "learning_rate": 1.494187368797529e-05, "loss": 0.8291, "step": 238860 }, { "epoch": 4.19371828859355, "grad_norm": 0.058354860026841565, "learning_rate": 1.4936365594953356e-05, "loss": 0.8337, "step": 238870 }, { "epoch": 4.1938938534735515, "grad_norm": 0.040664504757747945, "learning_rate": 1.4930858592374141e-05, "loss": 0.838, "step": 238880 }, { "epoch": 4.1940694183535525, "grad_norm": 0.04431804307988354, "learning_rate": 1.4925352680307439e-05, "loss": 0.8435, "step": 238890 }, { "epoch": 4.1942449832335535, "grad_norm": 0.04157011012163535, "learning_rate": 1.4919847858823022e-05, "loss": 0.8422, "step": 238900 }, { "epoch": 4.1944205481135555, "grad_norm": 0.05467649292908454, "learning_rate": 1.491434412799064e-05, "loss": 0.8479, "step": 238910 }, { "epoch": 4.1945961129935565, "grad_norm": 0.07201497248590844, "learning_rate": 1.4908841487880001e-05, "loss": 0.8359, "step": 238920 }, { "epoch": 4.194771677873558, "grad_norm": 0.0454261056214724, "learning_rate": 1.4903339938560834e-05, "loss": 0.8334, "step": 238930 }, { "epoch": 4.1949472427535595, "grad_norm": 0.06381271636284838, "learning_rate": 1.4897839480102884e-05, "loss": 0.8385, "step": 238940 }, { "epoch": 4.195122807633561, "grad_norm": 0.05110857931001891, "learning_rate": 1.4892340112575775e-05, "loss": 0.8401, "step": 238950 }, { "epoch": 4.195298372513562, "grad_norm": 0.050699325390513, "learning_rate": 1.488684183604923e-05, "loss": 0.8483, "step": 238960 }, { "epoch": 4.1954739373935634, "grad_norm": 0.09506957458651517, "learning_rate": 1.4881344650592896e-05, "loss": 0.8462, "step": 238970 }, { "epoch": 4.195649502273565, "grad_norm": 0.04977119563810642, "learning_rate": 1.487584855627645e-05, "loss": 0.833, "step": 238980 }, { "epoch": 4.195825067153566, "grad_norm": 0.051269208447805394, "learning_rate": 1.4870353553169483e-05, "loss": 0.8355, "step": 238990 }, { "epoch": 4.196000632033568, "grad_norm": 0.05439355174028603, "learning_rate": 1.4864859641341655e-05, "loss": 0.8344, "step": 239000 }, { "epoch": 4.196176196913569, "grad_norm": 0.08036506277291794, "learning_rate": 1.4859366820862583e-05, "loss": 0.8394, "step": 239010 }, { "epoch": 4.19635176179357, "grad_norm": 0.050670245958855824, "learning_rate": 1.485387509180181e-05, "loss": 0.8449, "step": 239020 }, { "epoch": 4.196527326673572, "grad_norm": 0.060054114104270785, "learning_rate": 1.4848384454228956e-05, "loss": 0.8333, "step": 239030 }, { "epoch": 4.196702891553573, "grad_norm": 0.059192208346806455, "learning_rate": 1.4842894908213603e-05, "loss": 0.8325, "step": 239040 }, { "epoch": 4.196878456433575, "grad_norm": 0.055375145143179115, "learning_rate": 1.4837406453825258e-05, "loss": 0.8449, "step": 239050 }, { "epoch": 4.197054021313576, "grad_norm": 0.06777053761179155, "learning_rate": 1.4831919091133493e-05, "loss": 0.8514, "step": 239060 }, { "epoch": 4.197229586193578, "grad_norm": 0.05112911154956292, "learning_rate": 1.4826432820207827e-05, "loss": 0.8402, "step": 239070 }, { "epoch": 4.197405151073579, "grad_norm": 0.0532879719998436, "learning_rate": 1.4820947641117789e-05, "loss": 0.8382, "step": 239080 }, { "epoch": 4.19758071595358, "grad_norm": 0.06795008024360363, "learning_rate": 1.4815463553932848e-05, "loss": 0.8295, "step": 239090 }, { "epoch": 4.197756280833582, "grad_norm": 0.04642792416122419, "learning_rate": 1.480998055872249e-05, "loss": 0.8338, "step": 239100 }, { "epoch": 4.197931845713583, "grad_norm": 0.05441607737636711, "learning_rate": 1.480449865555622e-05, "loss": 0.8378, "step": 239110 }, { "epoch": 4.198107410593585, "grad_norm": 0.05229518661667282, "learning_rate": 1.4799017844503454e-05, "loss": 0.8402, "step": 239120 }, { "epoch": 4.198282975473586, "grad_norm": 0.057068406491435096, "learning_rate": 1.4793538125633655e-05, "loss": 0.8385, "step": 239130 }, { "epoch": 4.198458540353588, "grad_norm": 0.05615783779173883, "learning_rate": 1.4788059499016243e-05, "loss": 0.8362, "step": 239140 }, { "epoch": 4.198634105233589, "grad_norm": 0.05521186971599593, "learning_rate": 1.4782581964720663e-05, "loss": 0.8348, "step": 239150 }, { "epoch": 4.19880967011359, "grad_norm": 0.04982772194462838, "learning_rate": 1.4777105522816267e-05, "loss": 0.8399, "step": 239160 }, { "epoch": 4.198985234993592, "grad_norm": 0.05075504042842896, "learning_rate": 1.4771630173372477e-05, "loss": 0.8416, "step": 239170 }, { "epoch": 4.199160799873593, "grad_norm": 0.05551057015734395, "learning_rate": 1.476615591645867e-05, "loss": 0.8364, "step": 239180 }, { "epoch": 4.199336364753595, "grad_norm": 0.05562441039203986, "learning_rate": 1.4760682752144181e-05, "loss": 0.832, "step": 239190 }, { "epoch": 4.199511929633596, "grad_norm": 0.04861344602947894, "learning_rate": 1.4755210680498372e-05, "loss": 0.8435, "step": 239200 }, { "epoch": 4.199687494513597, "grad_norm": 0.057115163388982815, "learning_rate": 1.4749739701590584e-05, "loss": 0.833, "step": 239210 }, { "epoch": 4.199863059393599, "grad_norm": 0.06944293835484647, "learning_rate": 1.4744269815490108e-05, "loss": 0.8376, "step": 239220 }, { "epoch": 4.2000386242736, "grad_norm": 0.066101515900532, "learning_rate": 1.4738801022266258e-05, "loss": 0.8332, "step": 239230 }, { "epoch": 4.200214189153602, "grad_norm": 0.054092544799123184, "learning_rate": 1.473333332198834e-05, "loss": 0.8395, "step": 239240 }, { "epoch": 4.200389754033603, "grad_norm": 0.05283988460160457, "learning_rate": 1.4727866714725636e-05, "loss": 0.8387, "step": 239250 }, { "epoch": 4.200565318913604, "grad_norm": 0.04564682519469561, "learning_rate": 1.4722401200547376e-05, "loss": 0.8412, "step": 239260 }, { "epoch": 4.200740883793606, "grad_norm": 0.04564044989829147, "learning_rate": 1.4716936779522825e-05, "loss": 0.8437, "step": 239270 }, { "epoch": 4.200916448673607, "grad_norm": 0.0437978729227878, "learning_rate": 1.4711473451721255e-05, "loss": 0.8475, "step": 239280 }, { "epoch": 4.201092013553609, "grad_norm": 0.0559277399925842, "learning_rate": 1.4706011217211817e-05, "loss": 0.8344, "step": 239290 }, { "epoch": 4.20126757843361, "grad_norm": 0.05082950275858786, "learning_rate": 1.4700550076063763e-05, "loss": 0.831, "step": 239300 }, { "epoch": 4.201443143313612, "grad_norm": 0.047734592882326614, "learning_rate": 1.4695090028346273e-05, "loss": 0.8423, "step": 239310 }, { "epoch": 4.201618708193613, "grad_norm": 0.04742461018060397, "learning_rate": 1.4689631074128565e-05, "loss": 0.842, "step": 239320 }, { "epoch": 4.201794273073614, "grad_norm": 0.06667239370607894, "learning_rate": 1.4684173213479743e-05, "loss": 0.8332, "step": 239330 }, { "epoch": 4.201969837953616, "grad_norm": 0.058742184058261766, "learning_rate": 1.467871644646899e-05, "loss": 0.8344, "step": 239340 }, { "epoch": 4.202145402833617, "grad_norm": 0.06134110593429998, "learning_rate": 1.4673260773165465e-05, "loss": 0.8305, "step": 239350 }, { "epoch": 4.202320967713619, "grad_norm": 0.04359962469914271, "learning_rate": 1.4667806193638239e-05, "loss": 0.8433, "step": 239360 }, { "epoch": 4.20249653259362, "grad_norm": 0.048332450896433864, "learning_rate": 1.4662352707956467e-05, "loss": 0.8409, "step": 239370 }, { "epoch": 4.202672097473622, "grad_norm": 0.05246657147172225, "learning_rate": 1.4656900316189251e-05, "loss": 0.8377, "step": 239380 }, { "epoch": 4.202847662353623, "grad_norm": 0.04915531275091929, "learning_rate": 1.4651449018405627e-05, "loss": 0.8302, "step": 239390 }, { "epoch": 4.203023227233624, "grad_norm": 0.055099345760648594, "learning_rate": 1.4645998814674708e-05, "loss": 0.8428, "step": 239400 }, { "epoch": 4.203198792113626, "grad_norm": 0.0672299639432927, "learning_rate": 1.4640549705065518e-05, "loss": 0.8385, "step": 239410 }, { "epoch": 4.203374356993627, "grad_norm": 0.06094646922522416, "learning_rate": 1.4635101689647145e-05, "loss": 0.8377, "step": 239420 }, { "epoch": 4.203549921873629, "grad_norm": 0.049247914204005844, "learning_rate": 1.4629654768488565e-05, "loss": 0.8428, "step": 239430 }, { "epoch": 4.20372548675363, "grad_norm": 0.04481555266259817, "learning_rate": 1.46242089416588e-05, "loss": 0.8403, "step": 239440 }, { "epoch": 4.203901051633631, "grad_norm": 0.057079552484352536, "learning_rate": 1.4618764209226894e-05, "loss": 0.8373, "step": 239450 }, { "epoch": 4.204076616513633, "grad_norm": 0.047648313625265525, "learning_rate": 1.4613320571261777e-05, "loss": 0.8409, "step": 239460 }, { "epoch": 4.204252181393634, "grad_norm": 0.04216260142589827, "learning_rate": 1.4607878027832436e-05, "loss": 0.8406, "step": 239470 }, { "epoch": 4.204427746273636, "grad_norm": 0.04311285402295743, "learning_rate": 1.4602436579007848e-05, "loss": 0.84, "step": 239480 }, { "epoch": 4.204603311153637, "grad_norm": 0.04910500524783706, "learning_rate": 1.4596996224856964e-05, "loss": 0.8379, "step": 239490 }, { "epoch": 4.204778876033639, "grad_norm": 0.04664365335233457, "learning_rate": 1.4591556965448682e-05, "loss": 0.838, "step": 239500 }, { "epoch": 4.20495444091364, "grad_norm": 0.046546588757555206, "learning_rate": 1.4586118800851928e-05, "loss": 0.8419, "step": 239510 }, { "epoch": 4.205130005793641, "grad_norm": 0.05320306832080392, "learning_rate": 1.4580681731135625e-05, "loss": 0.8364, "step": 239520 }, { "epoch": 4.205305570673643, "grad_norm": 0.04640300647198697, "learning_rate": 1.4575245756368633e-05, "loss": 0.8352, "step": 239530 }, { "epoch": 4.205481135553644, "grad_norm": 0.0626675928885754, "learning_rate": 1.4569810876619838e-05, "loss": 0.8456, "step": 239540 }, { "epoch": 4.205656700433646, "grad_norm": 0.05394861355639748, "learning_rate": 1.4564377091958128e-05, "loss": 0.8399, "step": 239550 }, { "epoch": 4.205832265313647, "grad_norm": 0.13375287537607627, "learning_rate": 1.4558944402452298e-05, "loss": 0.8353, "step": 239560 }, { "epoch": 4.206007830193648, "grad_norm": 0.04707292953863844, "learning_rate": 1.455351280817121e-05, "loss": 0.8284, "step": 239570 }, { "epoch": 4.20618339507365, "grad_norm": 0.049319210607717025, "learning_rate": 1.454808230918368e-05, "loss": 0.8424, "step": 239580 }, { "epoch": 4.206358959953651, "grad_norm": 0.049159034782633754, "learning_rate": 1.4542652905558546e-05, "loss": 0.8434, "step": 239590 }, { "epoch": 4.2065345248336525, "grad_norm": 0.05243876094898421, "learning_rate": 1.4537224597364544e-05, "loss": 0.8462, "step": 239600 }, { "epoch": 4.206710089713654, "grad_norm": 0.062094705677215964, "learning_rate": 1.453179738467048e-05, "loss": 0.8351, "step": 239610 }, { "epoch": 4.2068856545936555, "grad_norm": 0.06311591305295802, "learning_rate": 1.4526371267545125e-05, "loss": 0.8328, "step": 239620 }, { "epoch": 4.2070612194736565, "grad_norm": 0.06572120926269463, "learning_rate": 1.4520946246057202e-05, "loss": 0.8402, "step": 239630 }, { "epoch": 4.2072367843536576, "grad_norm": 0.048506757815291686, "learning_rate": 1.4515522320275465e-05, "loss": 0.8339, "step": 239640 }, { "epoch": 4.2074123492336595, "grad_norm": 0.06202590339290872, "learning_rate": 1.4510099490268634e-05, "loss": 0.8388, "step": 239650 }, { "epoch": 4.2075879141136605, "grad_norm": 0.044937578249730284, "learning_rate": 1.4504677756105447e-05, "loss": 0.8328, "step": 239660 }, { "epoch": 4.207763478993662, "grad_norm": 0.053245773704435735, "learning_rate": 1.4499257117854537e-05, "loss": 0.8347, "step": 239670 }, { "epoch": 4.2079390438736635, "grad_norm": 0.046593603774677674, "learning_rate": 1.4493837575584629e-05, "loss": 0.8363, "step": 239680 }, { "epoch": 4.2081146087536645, "grad_norm": 0.04154043578838674, "learning_rate": 1.4488419129364395e-05, "loss": 0.839, "step": 239690 }, { "epoch": 4.208290173633666, "grad_norm": 0.05632678181856761, "learning_rate": 1.448300177926244e-05, "loss": 0.8304, "step": 239700 }, { "epoch": 4.2084657385136675, "grad_norm": 0.04888436198780514, "learning_rate": 1.4477585525347443e-05, "loss": 0.8438, "step": 239710 }, { "epoch": 4.208641303393669, "grad_norm": 0.04612268225629976, "learning_rate": 1.4472170367688042e-05, "loss": 0.8416, "step": 239720 }, { "epoch": 4.20881686827367, "grad_norm": 0.05862468909100746, "learning_rate": 1.4466756306352798e-05, "loss": 0.84, "step": 239730 }, { "epoch": 4.208992433153672, "grad_norm": 0.053176819738471895, "learning_rate": 1.446134334141033e-05, "loss": 0.8435, "step": 239740 }, { "epoch": 4.209167998033673, "grad_norm": 0.05347546392726441, "learning_rate": 1.4455931472929238e-05, "loss": 0.8419, "step": 239750 }, { "epoch": 4.209343562913674, "grad_norm": 0.055170817331794256, "learning_rate": 1.4450520700978102e-05, "loss": 0.8438, "step": 239760 }, { "epoch": 4.209519127793676, "grad_norm": 0.047892550815512594, "learning_rate": 1.444511102562543e-05, "loss": 0.8473, "step": 239770 }, { "epoch": 4.209694692673677, "grad_norm": 0.04542913732760854, "learning_rate": 1.4439702446939788e-05, "loss": 0.836, "step": 239780 }, { "epoch": 4.209870257553679, "grad_norm": 0.05408377941717164, "learning_rate": 1.4434294964989727e-05, "loss": 0.8439, "step": 239790 }, { "epoch": 4.21004582243368, "grad_norm": 0.050882143312471415, "learning_rate": 1.4428888579843725e-05, "loss": 0.8275, "step": 239800 }, { "epoch": 4.210221387313682, "grad_norm": 0.06243959297127503, "learning_rate": 1.4423483291570286e-05, "loss": 0.8364, "step": 239810 }, { "epoch": 4.210396952193683, "grad_norm": 0.05661655305043809, "learning_rate": 1.441807910023791e-05, "loss": 0.841, "step": 239820 }, { "epoch": 4.210572517073684, "grad_norm": 0.05660857478523464, "learning_rate": 1.4412676005915093e-05, "loss": 0.8344, "step": 239830 }, { "epoch": 4.210748081953686, "grad_norm": 0.04767426704510583, "learning_rate": 1.4407274008670232e-05, "loss": 0.8424, "step": 239840 }, { "epoch": 4.210923646833687, "grad_norm": 0.07176836486653354, "learning_rate": 1.4401873108571817e-05, "loss": 0.8408, "step": 239850 }, { "epoch": 4.211099211713689, "grad_norm": 0.04844068680834538, "learning_rate": 1.4396473305688275e-05, "loss": 0.8397, "step": 239860 }, { "epoch": 4.21127477659369, "grad_norm": 0.0567201208637584, "learning_rate": 1.4391074600087995e-05, "loss": 0.8353, "step": 239870 }, { "epoch": 4.211450341473691, "grad_norm": 0.05183923497762161, "learning_rate": 1.438567699183941e-05, "loss": 0.8445, "step": 239880 }, { "epoch": 4.211625906353693, "grad_norm": 0.061980424769381245, "learning_rate": 1.4380280481010906e-05, "loss": 0.8386, "step": 239890 }, { "epoch": 4.211801471233694, "grad_norm": 0.047036437569523123, "learning_rate": 1.4374885067670837e-05, "loss": 0.8444, "step": 239900 }, { "epoch": 4.211977036113696, "grad_norm": 0.08694814501790638, "learning_rate": 1.4369490751887569e-05, "loss": 0.8353, "step": 239910 }, { "epoch": 4.212152600993697, "grad_norm": 0.04468459914659829, "learning_rate": 1.4364097533729453e-05, "loss": 0.8353, "step": 239920 }, { "epoch": 4.212328165873698, "grad_norm": 0.09953494315730398, "learning_rate": 1.4358705413264848e-05, "loss": 0.8385, "step": 239930 }, { "epoch": 4.2125037307537, "grad_norm": 0.04862348740876168, "learning_rate": 1.4353314390562029e-05, "loss": 0.8426, "step": 239940 }, { "epoch": 4.212679295633701, "grad_norm": 0.045509243176999475, "learning_rate": 1.434792446568933e-05, "loss": 0.833, "step": 239950 }, { "epoch": 4.212854860513703, "grad_norm": 0.05819063060678466, "learning_rate": 1.4342535638715046e-05, "loss": 0.835, "step": 239960 }, { "epoch": 4.213030425393704, "grad_norm": 0.05098627665511443, "learning_rate": 1.4337147909707426e-05, "loss": 0.8337, "step": 239970 }, { "epoch": 4.213205990273706, "grad_norm": 0.05256090373784981, "learning_rate": 1.4331761278734759e-05, "loss": 0.8475, "step": 239980 }, { "epoch": 4.213381555153707, "grad_norm": 0.04977669171837467, "learning_rate": 1.4326375745865276e-05, "loss": 0.8447, "step": 239990 }, { "epoch": 4.213557120033708, "grad_norm": 0.0588241158797664, "learning_rate": 1.4320991311167253e-05, "loss": 0.8388, "step": 240000 }, { "epoch": 4.21373268491371, "grad_norm": 0.0691371800545454, "learning_rate": 1.431560797470885e-05, "loss": 0.8397, "step": 240010 }, { "epoch": 4.213908249793711, "grad_norm": 0.08658492739864736, "learning_rate": 1.431022573655832e-05, "loss": 0.8404, "step": 240020 }, { "epoch": 4.214083814673713, "grad_norm": 0.050659362197159975, "learning_rate": 1.4304844596783854e-05, "loss": 0.8384, "step": 240030 }, { "epoch": 4.214259379553714, "grad_norm": 0.04314502990934232, "learning_rate": 1.4299464555453606e-05, "loss": 0.8376, "step": 240040 }, { "epoch": 4.214434944433716, "grad_norm": 0.05785387930395185, "learning_rate": 1.4294085612635754e-05, "loss": 0.8403, "step": 240050 }, { "epoch": 4.214610509313717, "grad_norm": 0.051471994312663895, "learning_rate": 1.4288707768398483e-05, "loss": 0.836, "step": 240060 }, { "epoch": 4.214786074193718, "grad_norm": 0.041762554227087634, "learning_rate": 1.4283331022809876e-05, "loss": 0.8416, "step": 240070 }, { "epoch": 4.21496163907372, "grad_norm": 0.04746770824948644, "learning_rate": 1.4277955375938084e-05, "loss": 0.8416, "step": 240080 }, { "epoch": 4.215137203953721, "grad_norm": 0.08345358592668715, "learning_rate": 1.4272580827851215e-05, "loss": 0.8414, "step": 240090 }, { "epoch": 4.215312768833723, "grad_norm": 0.05417783968134573, "learning_rate": 1.4267207378617395e-05, "loss": 0.8299, "step": 240100 }, { "epoch": 4.215488333713724, "grad_norm": 0.04744012312150704, "learning_rate": 1.426183502830466e-05, "loss": 0.8375, "step": 240110 }, { "epoch": 4.215663898593725, "grad_norm": 0.05021094069534072, "learning_rate": 1.4256463776981088e-05, "loss": 0.8486, "step": 240120 }, { "epoch": 4.215839463473727, "grad_norm": 0.05218723167536544, "learning_rate": 1.425109362471477e-05, "loss": 0.8447, "step": 240130 }, { "epoch": 4.216015028353728, "grad_norm": 0.05336545987882545, "learning_rate": 1.424572457157371e-05, "loss": 0.8403, "step": 240140 }, { "epoch": 4.21619059323373, "grad_norm": 0.049889094040252206, "learning_rate": 1.4240356617625942e-05, "loss": 0.8431, "step": 240150 }, { "epoch": 4.216366158113731, "grad_norm": 0.05714527300062633, "learning_rate": 1.4234989762939477e-05, "loss": 0.8429, "step": 240160 }, { "epoch": 4.216541722993733, "grad_norm": 0.050431916769047104, "learning_rate": 1.4229624007582335e-05, "loss": 0.8346, "step": 240170 }, { "epoch": 4.216717287873734, "grad_norm": 0.0553296566216928, "learning_rate": 1.4224259351622482e-05, "loss": 0.8403, "step": 240180 }, { "epoch": 4.216892852753735, "grad_norm": 0.05257262810740638, "learning_rate": 1.4218895795127888e-05, "loss": 0.8418, "step": 240190 }, { "epoch": 4.217068417633737, "grad_norm": 0.06062336036857276, "learning_rate": 1.4213533338166533e-05, "loss": 0.8425, "step": 240200 }, { "epoch": 4.217243982513738, "grad_norm": 0.054374654983159264, "learning_rate": 1.4208171980806317e-05, "loss": 0.8378, "step": 240210 }, { "epoch": 4.21741954739374, "grad_norm": 0.046218201194533876, "learning_rate": 1.420281172311521e-05, "loss": 0.8376, "step": 240220 }, { "epoch": 4.217595112273741, "grad_norm": 0.047402336224244716, "learning_rate": 1.4197452565161124e-05, "loss": 0.8384, "step": 240230 }, { "epoch": 4.217770677153742, "grad_norm": 0.06303333659762887, "learning_rate": 1.419209450701193e-05, "loss": 0.8442, "step": 240240 }, { "epoch": 4.217946242033744, "grad_norm": 0.043595678834713734, "learning_rate": 1.4186737548735535e-05, "loss": 0.844, "step": 240250 }, { "epoch": 4.218121806913745, "grad_norm": 0.07179739657347596, "learning_rate": 1.4181381690399814e-05, "loss": 0.8404, "step": 240260 }, { "epoch": 4.218297371793747, "grad_norm": 0.036985896921491866, "learning_rate": 1.4176026932072645e-05, "loss": 0.8377, "step": 240270 }, { "epoch": 4.218472936673748, "grad_norm": 0.043087754280461216, "learning_rate": 1.4170673273821838e-05, "loss": 0.8362, "step": 240280 }, { "epoch": 4.21864850155375, "grad_norm": 0.05084117062269771, "learning_rate": 1.4165320715715235e-05, "loss": 0.8471, "step": 240290 }, { "epoch": 4.218824066433751, "grad_norm": 0.053270904105779886, "learning_rate": 1.4159969257820681e-05, "loss": 0.8417, "step": 240300 }, { "epoch": 4.218999631313752, "grad_norm": 0.050441131056717985, "learning_rate": 1.4154618900205943e-05, "loss": 0.8385, "step": 240310 }, { "epoch": 4.219175196193754, "grad_norm": 0.053592334077166064, "learning_rate": 1.4149269642938828e-05, "loss": 0.8453, "step": 240320 }, { "epoch": 4.219350761073755, "grad_norm": 0.07871029781810085, "learning_rate": 1.4143921486087105e-05, "loss": 0.8377, "step": 240330 }, { "epoch": 4.2195263259537565, "grad_norm": 0.05194243840494294, "learning_rate": 1.413857442971857e-05, "loss": 0.8313, "step": 240340 }, { "epoch": 4.219701890833758, "grad_norm": 0.05485290938898338, "learning_rate": 1.4133228473900931e-05, "loss": 0.8368, "step": 240350 }, { "epoch": 4.219877455713759, "grad_norm": 0.053053207872248064, "learning_rate": 1.412788361870192e-05, "loss": 0.8368, "step": 240360 }, { "epoch": 4.2200530205937605, "grad_norm": 0.05628992490649282, "learning_rate": 1.4122539864189294e-05, "loss": 0.8407, "step": 240370 }, { "epoch": 4.220228585473762, "grad_norm": 0.04524397306460336, "learning_rate": 1.4117197210430722e-05, "loss": 0.8381, "step": 240380 }, { "epoch": 4.2204041503537635, "grad_norm": 0.04696154424649314, "learning_rate": 1.4111855657493923e-05, "loss": 0.84, "step": 240390 }, { "epoch": 4.2205797152337645, "grad_norm": 0.05301748565743717, "learning_rate": 1.4106515205446566e-05, "loss": 0.84, "step": 240400 }, { "epoch": 4.220755280113766, "grad_norm": 0.06306794049682851, "learning_rate": 1.4101175854356307e-05, "loss": 0.8381, "step": 240410 }, { "epoch": 4.2209308449937675, "grad_norm": 0.046748362126222576, "learning_rate": 1.4095837604290826e-05, "loss": 0.8389, "step": 240420 }, { "epoch": 4.2211064098737685, "grad_norm": 0.0547410516824761, "learning_rate": 1.4090500455317687e-05, "loss": 0.8406, "step": 240430 }, { "epoch": 4.22128197475377, "grad_norm": 0.056219745924112656, "learning_rate": 1.4085164407504608e-05, "loss": 0.8365, "step": 240440 }, { "epoch": 4.2214575396337715, "grad_norm": 0.05777817915772739, "learning_rate": 1.4079829460919127e-05, "loss": 0.8461, "step": 240450 }, { "epoch": 4.221633104513773, "grad_norm": 0.05958441483850585, "learning_rate": 1.4074495615628879e-05, "loss": 0.8457, "step": 240460 }, { "epoch": 4.221808669393774, "grad_norm": 0.05518231555715791, "learning_rate": 1.4069162871701444e-05, "loss": 0.8323, "step": 240470 }, { "epoch": 4.2219842342737754, "grad_norm": 0.05577944563249251, "learning_rate": 1.406383122920435e-05, "loss": 0.8354, "step": 240480 }, { "epoch": 4.222159799153777, "grad_norm": 0.04828805365640686, "learning_rate": 1.4058500688205175e-05, "loss": 0.8362, "step": 240490 }, { "epoch": 4.222335364033778, "grad_norm": 0.047802613108813646, "learning_rate": 1.4053171248771471e-05, "loss": 0.8436, "step": 240500 }, { "epoch": 4.22251092891378, "grad_norm": 0.05320671800272749, "learning_rate": 1.4047842910970754e-05, "loss": 0.8395, "step": 240510 }, { "epoch": 4.222686493793781, "grad_norm": 0.04654654936526097, "learning_rate": 1.4042515674870527e-05, "loss": 0.8428, "step": 240520 }, { "epoch": 4.222862058673783, "grad_norm": 0.06206292906587966, "learning_rate": 1.403718954053829e-05, "loss": 0.8493, "step": 240530 }, { "epoch": 4.223037623553784, "grad_norm": 0.044050355725322975, "learning_rate": 1.4031864508041544e-05, "loss": 0.8404, "step": 240540 }, { "epoch": 4.223213188433785, "grad_norm": 0.05820797378277896, "learning_rate": 1.4026540577447722e-05, "loss": 0.839, "step": 240550 }, { "epoch": 4.223388753313787, "grad_norm": 0.0522229121803316, "learning_rate": 1.4021217748824295e-05, "loss": 0.8463, "step": 240560 }, { "epoch": 4.223564318193788, "grad_norm": 0.04695277183924568, "learning_rate": 1.4015896022238728e-05, "loss": 0.8433, "step": 240570 }, { "epoch": 4.22373988307379, "grad_norm": 0.0456566881710039, "learning_rate": 1.4010575397758413e-05, "loss": 0.8432, "step": 240580 }, { "epoch": 4.223915447953791, "grad_norm": 0.06104497596843356, "learning_rate": 1.4005255875450808e-05, "loss": 0.8331, "step": 240590 }, { "epoch": 4.224091012833792, "grad_norm": 0.04638142712089257, "learning_rate": 1.3999937455383224e-05, "loss": 0.8437, "step": 240600 }, { "epoch": 4.224266577713794, "grad_norm": 0.05002820889684861, "learning_rate": 1.399462013762316e-05, "loss": 0.8425, "step": 240610 }, { "epoch": 4.224442142593795, "grad_norm": 0.09255004555767461, "learning_rate": 1.3989303922237914e-05, "loss": 0.8393, "step": 240620 }, { "epoch": 4.224617707473797, "grad_norm": 0.08894637088355647, "learning_rate": 1.3983988809294854e-05, "loss": 0.8496, "step": 240630 }, { "epoch": 4.224793272353798, "grad_norm": 0.05029105431129263, "learning_rate": 1.3978674798861363e-05, "loss": 0.8346, "step": 240640 }, { "epoch": 4.2249688372338, "grad_norm": 0.05047268795774555, "learning_rate": 1.3973361891004707e-05, "loss": 0.8432, "step": 240650 }, { "epoch": 4.225144402113801, "grad_norm": 0.04528177602203799, "learning_rate": 1.3968050085792245e-05, "loss": 0.8433, "step": 240660 }, { "epoch": 4.225319966993802, "grad_norm": 0.053971114611789904, "learning_rate": 1.396273938329126e-05, "loss": 0.8308, "step": 240670 }, { "epoch": 4.225495531873804, "grad_norm": 0.044148444454506414, "learning_rate": 1.3957429783569075e-05, "loss": 0.8357, "step": 240680 }, { "epoch": 4.225671096753805, "grad_norm": 0.04691361856806417, "learning_rate": 1.3952121286692915e-05, "loss": 0.8334, "step": 240690 }, { "epoch": 4.225846661633807, "grad_norm": 0.049931625949768375, "learning_rate": 1.3946813892730053e-05, "loss": 0.8393, "step": 240700 }, { "epoch": 4.226022226513808, "grad_norm": 0.049982279863026356, "learning_rate": 1.394150760174778e-05, "loss": 0.838, "step": 240710 }, { "epoch": 4.22619779139381, "grad_norm": 0.066776172043513, "learning_rate": 1.3936202413813248e-05, "loss": 0.8322, "step": 240720 }, { "epoch": 4.226373356273811, "grad_norm": 0.04837134120172976, "learning_rate": 1.393089832899373e-05, "loss": 0.8412, "step": 240730 }, { "epoch": 4.226548921153812, "grad_norm": 0.05809968415623063, "learning_rate": 1.3925595347356444e-05, "loss": 0.8335, "step": 240740 }, { "epoch": 4.226724486033814, "grad_norm": 0.0687266888138864, "learning_rate": 1.3920293468968523e-05, "loss": 0.8409, "step": 240750 }, { "epoch": 4.226900050913815, "grad_norm": 0.04815078801560053, "learning_rate": 1.3914992693897191e-05, "loss": 0.8397, "step": 240760 }, { "epoch": 4.227075615793817, "grad_norm": 0.059629883773512596, "learning_rate": 1.3909693022209552e-05, "loss": 0.8484, "step": 240770 }, { "epoch": 4.227251180673818, "grad_norm": 0.055217984386785796, "learning_rate": 1.3904394453972836e-05, "loss": 0.8411, "step": 240780 }, { "epoch": 4.227426745553819, "grad_norm": 0.05128201216686496, "learning_rate": 1.389909698925411e-05, "loss": 0.8407, "step": 240790 }, { "epoch": 4.227602310433821, "grad_norm": 0.046602934455631455, "learning_rate": 1.3893800628120523e-05, "loss": 0.8365, "step": 240800 }, { "epoch": 4.227777875313822, "grad_norm": 0.06894930359639355, "learning_rate": 1.3888505370639196e-05, "loss": 0.837, "step": 240810 }, { "epoch": 4.227953440193824, "grad_norm": 0.0429988954065823, "learning_rate": 1.3883211216877177e-05, "loss": 0.8431, "step": 240820 }, { "epoch": 4.228129005073825, "grad_norm": 0.05739031139367823, "learning_rate": 1.3877918166901573e-05, "loss": 0.8384, "step": 240830 }, { "epoch": 4.228304569953827, "grad_norm": 0.05670047754792792, "learning_rate": 1.3872626220779439e-05, "loss": 0.8428, "step": 240840 }, { "epoch": 4.228480134833828, "grad_norm": 0.04657814276538687, "learning_rate": 1.386733537857785e-05, "loss": 0.8419, "step": 240850 }, { "epoch": 4.228655699713829, "grad_norm": 0.055545684425114164, "learning_rate": 1.3862045640363796e-05, "loss": 0.8355, "step": 240860 }, { "epoch": 4.228831264593831, "grad_norm": 0.05899241344619098, "learning_rate": 1.3856757006204323e-05, "loss": 0.8354, "step": 240870 }, { "epoch": 4.229006829473832, "grad_norm": 0.03928037243323132, "learning_rate": 1.3851469476166473e-05, "loss": 0.844, "step": 240880 }, { "epoch": 4.229182394353834, "grad_norm": 0.05520586236153388, "learning_rate": 1.3846183050317174e-05, "loss": 0.8381, "step": 240890 }, { "epoch": 4.229357959233835, "grad_norm": 0.06010785961894963, "learning_rate": 1.3840897728723443e-05, "loss": 0.8326, "step": 240900 }, { "epoch": 4.229533524113836, "grad_norm": 0.053308526762104354, "learning_rate": 1.3835613511452262e-05, "loss": 0.8384, "step": 240910 }, { "epoch": 4.229709088993838, "grad_norm": 0.04631867079015561, "learning_rate": 1.3830330398570543e-05, "loss": 0.8416, "step": 240920 }, { "epoch": 4.229884653873839, "grad_norm": 0.07720090397818026, "learning_rate": 1.3825048390145256e-05, "loss": 0.8348, "step": 240930 }, { "epoch": 4.230060218753841, "grad_norm": 0.05217843192680856, "learning_rate": 1.3819767486243282e-05, "loss": 0.8401, "step": 240940 }, { "epoch": 4.230235783633842, "grad_norm": 0.06353922630971842, "learning_rate": 1.3814487686931593e-05, "loss": 0.839, "step": 240950 }, { "epoch": 4.230411348513844, "grad_norm": 0.05644149516119645, "learning_rate": 1.3809208992277048e-05, "loss": 0.8333, "step": 240960 }, { "epoch": 4.230586913393845, "grad_norm": 0.06049510675073393, "learning_rate": 1.3803931402346523e-05, "loss": 0.8444, "step": 240970 }, { "epoch": 4.230762478273846, "grad_norm": 0.05616164899380814, "learning_rate": 1.379865491720693e-05, "loss": 0.8438, "step": 240980 }, { "epoch": 4.230938043153848, "grad_norm": 0.055454130400046424, "learning_rate": 1.3793379536925056e-05, "loss": 0.837, "step": 240990 }, { "epoch": 4.231113608033849, "grad_norm": 0.04562442800242614, "learning_rate": 1.3788105261567785e-05, "loss": 0.8431, "step": 241000 }, { "epoch": 4.231289172913851, "grad_norm": 0.04735038968235897, "learning_rate": 1.3782832091201934e-05, "loss": 0.8381, "step": 241010 }, { "epoch": 4.231464737793852, "grad_norm": 0.05580196258080762, "learning_rate": 1.3777560025894337e-05, "loss": 0.84, "step": 241020 }, { "epoch": 4.231640302673853, "grad_norm": 0.05621500040058437, "learning_rate": 1.3772289065711744e-05, "loss": 0.8319, "step": 241030 }, { "epoch": 4.231815867553855, "grad_norm": 0.042686868769861386, "learning_rate": 1.3767019210720975e-05, "loss": 0.8385, "step": 241040 }, { "epoch": 4.231991432433856, "grad_norm": 0.053606467223112374, "learning_rate": 1.3761750460988809e-05, "loss": 0.8372, "step": 241050 }, { "epoch": 4.232166997313858, "grad_norm": 0.05685169774326579, "learning_rate": 1.3756482816581962e-05, "loss": 0.8415, "step": 241060 }, { "epoch": 4.232342562193859, "grad_norm": 0.050604534088638825, "learning_rate": 1.3751216277567196e-05, "loss": 0.8318, "step": 241070 }, { "epoch": 4.2325181270738605, "grad_norm": 0.04520950670625645, "learning_rate": 1.3745950844011262e-05, "loss": 0.8354, "step": 241080 }, { "epoch": 4.232693691953862, "grad_norm": 0.065133991179496, "learning_rate": 1.374068651598083e-05, "loss": 0.8418, "step": 241090 }, { "epoch": 4.232869256833863, "grad_norm": 0.0641231266871208, "learning_rate": 1.3735423293542641e-05, "loss": 0.8358, "step": 241100 }, { "epoch": 4.2330448217138645, "grad_norm": 0.0450861981607851, "learning_rate": 1.3730161176763334e-05, "loss": 0.8411, "step": 241110 }, { "epoch": 4.233220386593866, "grad_norm": 0.056462589954999455, "learning_rate": 1.3724900165709644e-05, "loss": 0.8438, "step": 241120 }, { "epoch": 4.2333959514738675, "grad_norm": 0.0661697314151564, "learning_rate": 1.3719640260448172e-05, "loss": 0.8383, "step": 241130 }, { "epoch": 4.2335715163538685, "grad_norm": 0.05177688953728711, "learning_rate": 1.371438146104559e-05, "loss": 0.8342, "step": 241140 }, { "epoch": 4.23374708123387, "grad_norm": 0.05528178848510979, "learning_rate": 1.3709123767568543e-05, "loss": 0.8402, "step": 241150 }, { "epoch": 4.2339226461138715, "grad_norm": 0.04946842067642066, "learning_rate": 1.3703867180083601e-05, "loss": 0.8408, "step": 241160 }, { "epoch": 4.2340982109938725, "grad_norm": 0.052757891718693294, "learning_rate": 1.36986116986574e-05, "loss": 0.8411, "step": 241170 }, { "epoch": 4.234273775873874, "grad_norm": 0.07054115415634576, "learning_rate": 1.3693357323356512e-05, "loss": 0.8436, "step": 241180 }, { "epoch": 4.2344493407538755, "grad_norm": 0.056808358404189574, "learning_rate": 1.368810405424755e-05, "loss": 0.8377, "step": 241190 }, { "epoch": 4.234624905633877, "grad_norm": 0.08088815300851024, "learning_rate": 1.3682851891397014e-05, "loss": 0.8336, "step": 241200 }, { "epoch": 4.234800470513878, "grad_norm": 0.047856268531734684, "learning_rate": 1.3677600834871473e-05, "loss": 0.8417, "step": 241210 }, { "epoch": 4.2349760353938795, "grad_norm": 0.05776089537884784, "learning_rate": 1.3672350884737497e-05, "loss": 0.8379, "step": 241220 }, { "epoch": 4.235151600273881, "grad_norm": 0.08728035332027381, "learning_rate": 1.3667102041061542e-05, "loss": 0.8408, "step": 241230 }, { "epoch": 4.235327165153882, "grad_norm": 0.057246920152760686, "learning_rate": 1.3661854303910146e-05, "loss": 0.8432, "step": 241240 }, { "epoch": 4.235502730033884, "grad_norm": 0.05379176737311252, "learning_rate": 1.3656607673349819e-05, "loss": 0.8383, "step": 241250 }, { "epoch": 4.235678294913885, "grad_norm": 0.054842426030088216, "learning_rate": 1.3651362149446989e-05, "loss": 0.8314, "step": 241260 }, { "epoch": 4.235853859793886, "grad_norm": 0.0691559803782567, "learning_rate": 1.3646117732268155e-05, "loss": 0.8335, "step": 241270 }, { "epoch": 4.236029424673888, "grad_norm": 0.04717467703286169, "learning_rate": 1.3640874421879715e-05, "loss": 0.8396, "step": 241280 }, { "epoch": 4.236204989553889, "grad_norm": 0.04390337480728649, "learning_rate": 1.3635632218348185e-05, "loss": 0.8351, "step": 241290 }, { "epoch": 4.236380554433891, "grad_norm": 0.07006956524728941, "learning_rate": 1.3630391121739913e-05, "loss": 0.8329, "step": 241300 }, { "epoch": 4.236556119313892, "grad_norm": 0.05060373249719249, "learning_rate": 1.3625151132121325e-05, "loss": 0.8459, "step": 241310 }, { "epoch": 4.236731684193894, "grad_norm": 0.05153749019459427, "learning_rate": 1.3619912249558842e-05, "loss": 0.8323, "step": 241320 }, { "epoch": 4.236907249073895, "grad_norm": 0.05963429071528505, "learning_rate": 1.3614674474118799e-05, "loss": 0.844, "step": 241330 }, { "epoch": 4.237082813953896, "grad_norm": 0.056053586583545895, "learning_rate": 1.3609437805867566e-05, "loss": 0.8341, "step": 241340 }, { "epoch": 4.237258378833898, "grad_norm": 0.06639020968076868, "learning_rate": 1.3604202244871517e-05, "loss": 0.8251, "step": 241350 }, { "epoch": 4.237433943713899, "grad_norm": 0.04472469274322239, "learning_rate": 1.3598967791196996e-05, "loss": 0.8385, "step": 241360 }, { "epoch": 4.237609508593901, "grad_norm": 0.0700762583481631, "learning_rate": 1.3593734444910274e-05, "loss": 0.8286, "step": 241370 }, { "epoch": 4.237785073473902, "grad_norm": 0.060761300054985305, "learning_rate": 1.3588502206077696e-05, "loss": 0.8329, "step": 241380 }, { "epoch": 4.237960638353904, "grad_norm": 0.04791565911846247, "learning_rate": 1.3583271074765569e-05, "loss": 0.8335, "step": 241390 }, { "epoch": 4.238136203233905, "grad_norm": 0.07902045392038887, "learning_rate": 1.3578041051040125e-05, "loss": 0.8427, "step": 241400 }, { "epoch": 4.238311768113906, "grad_norm": 0.05537030605835103, "learning_rate": 1.3572812134967652e-05, "loss": 0.8297, "step": 241410 }, { "epoch": 4.238487332993908, "grad_norm": 0.0529074222800093, "learning_rate": 1.356758432661443e-05, "loss": 0.8369, "step": 241420 }, { "epoch": 4.238662897873909, "grad_norm": 0.05572481285708759, "learning_rate": 1.3562357626046645e-05, "loss": 0.8407, "step": 241430 }, { "epoch": 4.238838462753911, "grad_norm": 0.04327310847845957, "learning_rate": 1.3557132033330577e-05, "loss": 0.8402, "step": 241440 }, { "epoch": 4.239014027633912, "grad_norm": 0.05329100753581048, "learning_rate": 1.3551907548532347e-05, "loss": 0.8365, "step": 241450 }, { "epoch": 4.239189592513913, "grad_norm": 0.04757568173762572, "learning_rate": 1.3546684171718266e-05, "loss": 0.8364, "step": 241460 }, { "epoch": 4.239365157393915, "grad_norm": 0.05326145461173357, "learning_rate": 1.3541461902954432e-05, "loss": 0.8453, "step": 241470 }, { "epoch": 4.239540722273916, "grad_norm": 0.046160820900204624, "learning_rate": 1.3536240742307029e-05, "loss": 0.842, "step": 241480 }, { "epoch": 4.239716287153918, "grad_norm": 0.05215502272625961, "learning_rate": 1.3531020689842241e-05, "loss": 0.8455, "step": 241490 }, { "epoch": 4.239891852033919, "grad_norm": 0.055413404394274654, "learning_rate": 1.352580174562617e-05, "loss": 0.8374, "step": 241500 }, { "epoch": 4.24006741691392, "grad_norm": 0.06289976867026521, "learning_rate": 1.3520583909724955e-05, "loss": 0.8385, "step": 241510 }, { "epoch": 4.240242981793922, "grad_norm": 0.04710476060465758, "learning_rate": 1.3515367182204699e-05, "loss": 0.8369, "step": 241520 }, { "epoch": 4.240418546673923, "grad_norm": 0.07120314039183764, "learning_rate": 1.3510151563131543e-05, "loss": 0.8338, "step": 241530 }, { "epoch": 4.240594111553925, "grad_norm": 0.04699338177894847, "learning_rate": 1.3504937052571501e-05, "loss": 0.8355, "step": 241540 }, { "epoch": 4.240769676433926, "grad_norm": 0.055008831673032825, "learning_rate": 1.3499723650590679e-05, "loss": 0.8393, "step": 241550 }, { "epoch": 4.240945241313928, "grad_norm": 0.04340659028650438, "learning_rate": 1.3494511357255149e-05, "loss": 0.8351, "step": 241560 }, { "epoch": 4.241120806193929, "grad_norm": 0.04524228874865247, "learning_rate": 1.34893001726309e-05, "loss": 0.8463, "step": 241570 }, { "epoch": 4.24129637107393, "grad_norm": 0.06601588060408757, "learning_rate": 1.3484090096783993e-05, "loss": 0.8406, "step": 241580 }, { "epoch": 4.241471935953932, "grad_norm": 0.05117474832007829, "learning_rate": 1.3478881129780462e-05, "loss": 0.8434, "step": 241590 }, { "epoch": 4.241647500833933, "grad_norm": 0.04271839031463982, "learning_rate": 1.3473673271686266e-05, "loss": 0.8401, "step": 241600 }, { "epoch": 4.241823065713935, "grad_norm": 0.049168451551456785, "learning_rate": 1.3468466522567414e-05, "loss": 0.8362, "step": 241610 }, { "epoch": 4.241998630593936, "grad_norm": 0.04647121969705215, "learning_rate": 1.3463260882489834e-05, "loss": 0.8372, "step": 241620 }, { "epoch": 4.242174195473938, "grad_norm": 0.05493191534055402, "learning_rate": 1.345805635151955e-05, "loss": 0.8371, "step": 241630 }, { "epoch": 4.242349760353939, "grad_norm": 0.05069818333730023, "learning_rate": 1.3452852929722453e-05, "loss": 0.8373, "step": 241640 }, { "epoch": 4.24252532523394, "grad_norm": 0.054294577751108346, "learning_rate": 1.3447650617164482e-05, "loss": 0.8409, "step": 241650 }, { "epoch": 4.242700890113942, "grad_norm": 0.05028978979192163, "learning_rate": 1.3442449413911582e-05, "loss": 0.8341, "step": 241660 }, { "epoch": 4.242876454993943, "grad_norm": 0.06698424135856194, "learning_rate": 1.3437249320029601e-05, "loss": 0.8383, "step": 241670 }, { "epoch": 4.243052019873945, "grad_norm": 0.043619779832724206, "learning_rate": 1.3432050335584458e-05, "loss": 0.8472, "step": 241680 }, { "epoch": 4.243227584753946, "grad_norm": 0.05055235389471156, "learning_rate": 1.3426852460642016e-05, "loss": 0.8414, "step": 241690 }, { "epoch": 4.243403149633947, "grad_norm": 0.045345716873456676, "learning_rate": 1.3421655695268164e-05, "loss": 0.8394, "step": 241700 }, { "epoch": 4.243578714513949, "grad_norm": 0.049473932433064714, "learning_rate": 1.3416460039528688e-05, "loss": 0.8457, "step": 241710 }, { "epoch": 4.24375427939395, "grad_norm": 0.045044453442434976, "learning_rate": 1.3411265493489463e-05, "loss": 0.8376, "step": 241720 }, { "epoch": 4.243929844273952, "grad_norm": 0.05530692626906091, "learning_rate": 1.3406072057216303e-05, "loss": 0.841, "step": 241730 }, { "epoch": 4.244105409153953, "grad_norm": 0.07602651293921141, "learning_rate": 1.3400879730774968e-05, "loss": 0.8406, "step": 241740 }, { "epoch": 4.244280974033955, "grad_norm": 0.05802822238993708, "learning_rate": 1.3395688514231287e-05, "loss": 0.8358, "step": 241750 }, { "epoch": 4.244456538913956, "grad_norm": 0.05814539089112582, "learning_rate": 1.3390498407651036e-05, "loss": 0.8459, "step": 241760 }, { "epoch": 4.244632103793957, "grad_norm": 0.04798526272991988, "learning_rate": 1.338530941109995e-05, "loss": 0.8365, "step": 241770 }, { "epoch": 4.244807668673959, "grad_norm": 0.04238992162764868, "learning_rate": 1.3380121524643796e-05, "loss": 0.8422, "step": 241780 }, { "epoch": 4.24498323355396, "grad_norm": 0.04576127237619247, "learning_rate": 1.3374934748348258e-05, "loss": 0.8361, "step": 241790 }, { "epoch": 4.245158798433962, "grad_norm": 0.05026444726832254, "learning_rate": 1.3369749082279131e-05, "loss": 0.8398, "step": 241800 }, { "epoch": 4.245334363313963, "grad_norm": 0.04747487455059191, "learning_rate": 1.3364564526502052e-05, "loss": 0.8497, "step": 241810 }, { "epoch": 4.245509928193964, "grad_norm": 0.04854943535687436, "learning_rate": 1.335938108108274e-05, "loss": 0.8355, "step": 241820 }, { "epoch": 4.245685493073966, "grad_norm": 0.05761305444321205, "learning_rate": 1.3354198746086887e-05, "loss": 0.8376, "step": 241830 }, { "epoch": 4.245861057953967, "grad_norm": 0.0545071224398786, "learning_rate": 1.3349017521580112e-05, "loss": 0.8458, "step": 241840 }, { "epoch": 4.2460366228339685, "grad_norm": 0.04773444746289833, "learning_rate": 1.3343837407628076e-05, "loss": 0.8345, "step": 241850 }, { "epoch": 4.24621218771397, "grad_norm": 0.04122744429185112, "learning_rate": 1.333865840429643e-05, "loss": 0.8403, "step": 241860 }, { "epoch": 4.2463877525939715, "grad_norm": 0.04899755247983229, "learning_rate": 1.3333480511650793e-05, "loss": 0.8369, "step": 241870 }, { "epoch": 4.2465633174739725, "grad_norm": 0.04453043987676115, "learning_rate": 1.3328303729756744e-05, "loss": 0.8392, "step": 241880 }, { "epoch": 4.246738882353974, "grad_norm": 0.05597401235032556, "learning_rate": 1.3323128058679896e-05, "loss": 0.8353, "step": 241890 }, { "epoch": 4.2469144472339755, "grad_norm": 0.05977092863334185, "learning_rate": 1.331795349848583e-05, "loss": 0.8355, "step": 241900 }, { "epoch": 4.2470900121139765, "grad_norm": 0.06648765200072392, "learning_rate": 1.3312780049240076e-05, "loss": 0.828, "step": 241910 }, { "epoch": 4.247265576993978, "grad_norm": 0.05691198781292603, "learning_rate": 1.3307607711008208e-05, "loss": 0.8378, "step": 241920 }, { "epoch": 4.2474411418739795, "grad_norm": 0.044902025117654835, "learning_rate": 1.3302436483855777e-05, "loss": 0.8406, "step": 241930 }, { "epoch": 4.2476167067539805, "grad_norm": 0.06328920593771623, "learning_rate": 1.3297266367848258e-05, "loss": 0.836, "step": 241940 }, { "epoch": 4.247792271633982, "grad_norm": 0.06493181293253505, "learning_rate": 1.3292097363051204e-05, "loss": 0.8366, "step": 241950 }, { "epoch": 4.2479678365139835, "grad_norm": 0.05924680715192928, "learning_rate": 1.3286929469530055e-05, "loss": 0.8415, "step": 241960 }, { "epoch": 4.248143401393985, "grad_norm": 0.04917902992132883, "learning_rate": 1.3281762687350348e-05, "loss": 0.8391, "step": 241970 }, { "epoch": 4.248318966273986, "grad_norm": 0.04990182963370488, "learning_rate": 1.32765970165775e-05, "loss": 0.8396, "step": 241980 }, { "epoch": 4.248494531153988, "grad_norm": 0.06389512409173101, "learning_rate": 1.327143245727698e-05, "loss": 0.8386, "step": 241990 }, { "epoch": 4.248670096033989, "grad_norm": 0.05978384949998291, "learning_rate": 1.3266269009514254e-05, "loss": 0.8304, "step": 242000 }, { "epoch": 4.24884566091399, "grad_norm": 0.07304571696560898, "learning_rate": 1.3261106673354685e-05, "loss": 0.8352, "step": 242010 }, { "epoch": 4.249021225793992, "grad_norm": 0.05759227297358204, "learning_rate": 1.3255945448863702e-05, "loss": 0.846, "step": 242020 }, { "epoch": 4.249196790673993, "grad_norm": 0.05271365190010736, "learning_rate": 1.325078533610672e-05, "loss": 0.8336, "step": 242030 }, { "epoch": 4.249372355553995, "grad_norm": 0.03825835536314534, "learning_rate": 1.3245626335149116e-05, "loss": 0.8415, "step": 242040 }, { "epoch": 4.249547920433996, "grad_norm": 0.048913933679321134, "learning_rate": 1.3240468446056231e-05, "loss": 0.8327, "step": 242050 }, { "epoch": 4.249723485313998, "grad_norm": 0.05629054836584174, "learning_rate": 1.3235311668893425e-05, "loss": 0.8414, "step": 242060 }, { "epoch": 4.249899050193999, "grad_norm": 0.06005281795549607, "learning_rate": 1.3230156003726063e-05, "loss": 0.8431, "step": 242070 }, { "epoch": 4.250074615074, "grad_norm": 0.047164499174835096, "learning_rate": 1.3225001450619426e-05, "loss": 0.8468, "step": 242080 }, { "epoch": 4.250250179954002, "grad_norm": 0.058636207105191525, "learning_rate": 1.3219848009638838e-05, "loss": 0.8359, "step": 242090 }, { "epoch": 4.250425744834003, "grad_norm": 0.04671129157323506, "learning_rate": 1.3214695680849625e-05, "loss": 0.8367, "step": 242100 }, { "epoch": 4.250601309714005, "grad_norm": 0.06201569893386411, "learning_rate": 1.3209544464317019e-05, "loss": 0.8282, "step": 242110 }, { "epoch": 4.250776874594006, "grad_norm": 0.0462313319673513, "learning_rate": 1.320439436010633e-05, "loss": 0.8347, "step": 242120 }, { "epoch": 4.250952439474007, "grad_norm": 0.04549540082111642, "learning_rate": 1.3199245368282748e-05, "loss": 0.832, "step": 242130 }, { "epoch": 4.251128004354009, "grad_norm": 0.06206307774208999, "learning_rate": 1.3194097488911591e-05, "loss": 0.8347, "step": 242140 }, { "epoch": 4.25130356923401, "grad_norm": 0.04957044994972902, "learning_rate": 1.3188950722058039e-05, "loss": 0.8376, "step": 242150 }, { "epoch": 4.251479134114012, "grad_norm": 0.04402436530631118, "learning_rate": 1.3183805067787299e-05, "loss": 0.8371, "step": 242160 }, { "epoch": 4.251654698994013, "grad_norm": 0.049600885773311704, "learning_rate": 1.317866052616461e-05, "loss": 0.8422, "step": 242170 }, { "epoch": 4.251830263874014, "grad_norm": 0.04567852643834656, "learning_rate": 1.317351709725509e-05, "loss": 0.8346, "step": 242180 }, { "epoch": 4.252005828754016, "grad_norm": 0.04504928519518124, "learning_rate": 1.3168374781123941e-05, "loss": 0.8357, "step": 242190 }, { "epoch": 4.252181393634017, "grad_norm": 0.04602975418105225, "learning_rate": 1.3163233577836329e-05, "loss": 0.8401, "step": 242200 }, { "epoch": 4.252356958514019, "grad_norm": 0.056364236838173196, "learning_rate": 1.3158093487457388e-05, "loss": 0.8388, "step": 242210 }, { "epoch": 4.25253252339402, "grad_norm": 0.06238030855983602, "learning_rate": 1.315295451005223e-05, "loss": 0.8443, "step": 242220 }, { "epoch": 4.252708088274022, "grad_norm": 0.047276020684696225, "learning_rate": 1.3147816645685965e-05, "loss": 0.8385, "step": 242230 }, { "epoch": 4.252883653154023, "grad_norm": 0.05531504773045428, "learning_rate": 1.314267989442372e-05, "loss": 0.8385, "step": 242240 }, { "epoch": 4.253059218034024, "grad_norm": 0.048641261269313826, "learning_rate": 1.313754425633054e-05, "loss": 0.8393, "step": 242250 }, { "epoch": 4.253234782914026, "grad_norm": 0.04671435683780637, "learning_rate": 1.3132409731471512e-05, "loss": 0.8348, "step": 242260 }, { "epoch": 4.253410347794027, "grad_norm": 0.05932493716049996, "learning_rate": 1.3127276319911718e-05, "loss": 0.8323, "step": 242270 }, { "epoch": 4.253585912674029, "grad_norm": 0.04876420050760672, "learning_rate": 1.3122144021716149e-05, "loss": 0.846, "step": 242280 }, { "epoch": 4.25376147755403, "grad_norm": 0.05942522786461549, "learning_rate": 1.3117012836949868e-05, "loss": 0.8357, "step": 242290 }, { "epoch": 4.253937042434032, "grad_norm": 0.045163574419054296, "learning_rate": 1.3111882765677851e-05, "loss": 0.8337, "step": 242300 }, { "epoch": 4.254112607314033, "grad_norm": 0.05830146330711567, "learning_rate": 1.3106753807965161e-05, "loss": 0.8423, "step": 242310 }, { "epoch": 4.254288172194034, "grad_norm": 0.060918598390278744, "learning_rate": 1.3101625963876726e-05, "loss": 0.8372, "step": 242320 }, { "epoch": 4.254463737074036, "grad_norm": 0.045268733816303384, "learning_rate": 1.3096499233477525e-05, "loss": 0.8269, "step": 242330 }, { "epoch": 4.254639301954037, "grad_norm": 0.06593371227131385, "learning_rate": 1.3091373616832558e-05, "loss": 0.8429, "step": 242340 }, { "epoch": 4.254814866834039, "grad_norm": 0.05964018144875091, "learning_rate": 1.308624911400671e-05, "loss": 0.8278, "step": 242350 }, { "epoch": 4.25499043171404, "grad_norm": 0.0486012769594161, "learning_rate": 1.3081125725064939e-05, "loss": 0.8366, "step": 242360 }, { "epoch": 4.255165996594041, "grad_norm": 0.04942617462369631, "learning_rate": 1.3076003450072142e-05, "loss": 0.8373, "step": 242370 }, { "epoch": 4.255341561474043, "grad_norm": 0.0483091695798849, "learning_rate": 1.307088228909326e-05, "loss": 0.8381, "step": 242380 }, { "epoch": 4.255517126354044, "grad_norm": 0.05307991426135474, "learning_rate": 1.306576224219313e-05, "loss": 0.8402, "step": 242390 }, { "epoch": 4.255692691234046, "grad_norm": 0.05357365413304437, "learning_rate": 1.3060643309436642e-05, "loss": 0.8316, "step": 242400 }, { "epoch": 4.255868256114047, "grad_norm": 0.05124660657985264, "learning_rate": 1.3055525490888679e-05, "loss": 0.8425, "step": 242410 }, { "epoch": 4.256043820994049, "grad_norm": 0.05397031066064715, "learning_rate": 1.3050408786614046e-05, "loss": 0.8447, "step": 242420 }, { "epoch": 4.25621938587405, "grad_norm": 0.05336289546046423, "learning_rate": 1.3045293196677589e-05, "loss": 0.8349, "step": 242430 }, { "epoch": 4.256394950754051, "grad_norm": 0.053558733494403606, "learning_rate": 1.3040178721144145e-05, "loss": 0.8377, "step": 242440 }, { "epoch": 4.256570515634053, "grad_norm": 0.052784358740563704, "learning_rate": 1.3035065360078465e-05, "loss": 0.8429, "step": 242450 }, { "epoch": 4.256746080514054, "grad_norm": 0.05197527841371333, "learning_rate": 1.3029953113545395e-05, "loss": 0.837, "step": 242460 }, { "epoch": 4.256921645394056, "grad_norm": 0.05475921408677017, "learning_rate": 1.3024841981609632e-05, "loss": 0.8428, "step": 242470 }, { "epoch": 4.257097210274057, "grad_norm": 0.04625936133120867, "learning_rate": 1.3019731964336021e-05, "loss": 0.8381, "step": 242480 }, { "epoch": 4.257272775154058, "grad_norm": 0.04777383582683556, "learning_rate": 1.301462306178925e-05, "loss": 0.8405, "step": 242490 }, { "epoch": 4.25744834003406, "grad_norm": 0.05550459479838766, "learning_rate": 1.3009515274034062e-05, "loss": 0.8475, "step": 242500 }, { "epoch": 4.257623904914061, "grad_norm": 0.05764701501427514, "learning_rate": 1.3004408601135209e-05, "loss": 0.8445, "step": 242510 }, { "epoch": 4.257799469794063, "grad_norm": 0.05406282735311401, "learning_rate": 1.2999303043157332e-05, "loss": 0.8435, "step": 242520 }, { "epoch": 4.257975034674064, "grad_norm": 0.05034356781407459, "learning_rate": 1.2994198600165159e-05, "loss": 0.8395, "step": 242530 }, { "epoch": 4.258150599554066, "grad_norm": 0.05185165816759088, "learning_rate": 1.2989095272223355e-05, "loss": 0.8384, "step": 242540 }, { "epoch": 4.258326164434067, "grad_norm": 0.05187329814364592, "learning_rate": 1.2983993059396602e-05, "loss": 0.8363, "step": 242550 }, { "epoch": 4.258501729314068, "grad_norm": 0.05105091941912696, "learning_rate": 1.2978891961749498e-05, "loss": 0.8421, "step": 242560 }, { "epoch": 4.25867729419407, "grad_norm": 0.04412348172468183, "learning_rate": 1.2973791979346708e-05, "loss": 0.837, "step": 242570 }, { "epoch": 4.258852859074071, "grad_norm": 0.07642861086201984, "learning_rate": 1.2968693112252861e-05, "loss": 0.8389, "step": 242580 }, { "epoch": 4.2590284239540726, "grad_norm": 0.057771429975473465, "learning_rate": 1.2963595360532532e-05, "loss": 0.8445, "step": 242590 }, { "epoch": 4.259203988834074, "grad_norm": 0.061122447140581716, "learning_rate": 1.2958498724250312e-05, "loss": 0.8361, "step": 242600 }, { "epoch": 4.259379553714075, "grad_norm": 0.051610133860869455, "learning_rate": 1.2953403203470807e-05, "loss": 0.8446, "step": 242610 }, { "epoch": 4.2595551185940765, "grad_norm": 0.04637698986518553, "learning_rate": 1.294830879825854e-05, "loss": 0.8392, "step": 242620 }, { "epoch": 4.259730683474078, "grad_norm": 0.04508035272149031, "learning_rate": 1.2943215508678094e-05, "loss": 0.8341, "step": 242630 }, { "epoch": 4.2599062483540795, "grad_norm": 0.06369010037538243, "learning_rate": 1.293812333479395e-05, "loss": 0.8351, "step": 242640 }, { "epoch": 4.2600818132340805, "grad_norm": 0.05047318521902229, "learning_rate": 1.2933032276670699e-05, "loss": 0.8342, "step": 242650 }, { "epoch": 4.2602573781140824, "grad_norm": 0.057972212119803496, "learning_rate": 1.2927942334372778e-05, "loss": 0.8375, "step": 242660 }, { "epoch": 4.2604329429940835, "grad_norm": 0.04732270922310417, "learning_rate": 1.2922853507964716e-05, "loss": 0.839, "step": 242670 }, { "epoch": 4.2606085078740845, "grad_norm": 0.047418389798708604, "learning_rate": 1.2917765797510991e-05, "loss": 0.8278, "step": 242680 }, { "epoch": 4.260784072754086, "grad_norm": 0.04654095024428527, "learning_rate": 1.2912679203076042e-05, "loss": 0.8323, "step": 242690 }, { "epoch": 4.2609596376340875, "grad_norm": 0.05229678445684744, "learning_rate": 1.2907593724724321e-05, "loss": 0.8401, "step": 242700 }, { "epoch": 4.261135202514089, "grad_norm": 0.06505310922639307, "learning_rate": 1.2902509362520283e-05, "loss": 0.8392, "step": 242710 }, { "epoch": 4.26131076739409, "grad_norm": 0.0664282002661711, "learning_rate": 1.2897426116528344e-05, "loss": 0.836, "step": 242720 }, { "epoch": 4.261486332274092, "grad_norm": 0.04643481174810188, "learning_rate": 1.289234398681288e-05, "loss": 0.8479, "step": 242730 }, { "epoch": 4.261661897154093, "grad_norm": 0.05652757230192746, "learning_rate": 1.2887262973438314e-05, "loss": 0.845, "step": 242740 }, { "epoch": 4.261837462034094, "grad_norm": 0.05041272405530305, "learning_rate": 1.288218307646902e-05, "loss": 0.8367, "step": 242750 }, { "epoch": 4.262013026914096, "grad_norm": 0.05194966438662301, "learning_rate": 1.2877104295969348e-05, "loss": 0.8342, "step": 242760 }, { "epoch": 4.262188591794097, "grad_norm": 0.04653447164401356, "learning_rate": 1.2872026632003645e-05, "loss": 0.8425, "step": 242770 }, { "epoch": 4.262364156674099, "grad_norm": 0.05005771931292351, "learning_rate": 1.2866950084636273e-05, "loss": 0.8423, "step": 242780 }, { "epoch": 4.2625397215541, "grad_norm": 0.06754659458606181, "learning_rate": 1.2861874653931521e-05, "loss": 0.8391, "step": 242790 }, { "epoch": 4.262715286434101, "grad_norm": 0.04833763500340026, "learning_rate": 1.2856800339953721e-05, "loss": 0.8376, "step": 242800 }, { "epoch": 4.262890851314103, "grad_norm": 0.052926923892620534, "learning_rate": 1.2851727142767115e-05, "loss": 0.8392, "step": 242810 }, { "epoch": 4.263066416194104, "grad_norm": 0.05682119001807977, "learning_rate": 1.2846655062436066e-05, "loss": 0.8405, "step": 242820 }, { "epoch": 4.263241981074106, "grad_norm": 0.06545723221609097, "learning_rate": 1.2841584099024767e-05, "loss": 0.8388, "step": 242830 }, { "epoch": 4.263417545954107, "grad_norm": 0.06430139432264231, "learning_rate": 1.283651425259749e-05, "loss": 0.8469, "step": 242840 }, { "epoch": 4.263593110834108, "grad_norm": 0.0626513494107096, "learning_rate": 1.2831445523218494e-05, "loss": 0.8376, "step": 242850 }, { "epoch": 4.26376867571411, "grad_norm": 0.060045696033337745, "learning_rate": 1.2826377910951964e-05, "loss": 0.8337, "step": 242860 }, { "epoch": 4.263944240594111, "grad_norm": 0.05243271989026457, "learning_rate": 1.2821311415862125e-05, "loss": 0.8384, "step": 242870 }, { "epoch": 4.264119805474113, "grad_norm": 0.07299048044256033, "learning_rate": 1.2816246038013172e-05, "loss": 0.8425, "step": 242880 }, { "epoch": 4.264295370354114, "grad_norm": 0.054988534903137425, "learning_rate": 1.2811181777469299e-05, "loss": 0.8357, "step": 242890 }, { "epoch": 4.264470935234116, "grad_norm": 0.06476739278926044, "learning_rate": 1.2806118634294626e-05, "loss": 0.8349, "step": 242900 }, { "epoch": 4.264646500114117, "grad_norm": 0.05215845308270696, "learning_rate": 1.2801056608553341e-05, "loss": 0.8469, "step": 242910 }, { "epoch": 4.264822064994118, "grad_norm": 0.046057595320923767, "learning_rate": 1.2795995700309587e-05, "loss": 0.8466, "step": 242920 }, { "epoch": 4.26499762987412, "grad_norm": 0.04923370350567672, "learning_rate": 1.2790935909627461e-05, "loss": 0.8379, "step": 242930 }, { "epoch": 4.265173194754121, "grad_norm": 0.05832881166686537, "learning_rate": 1.2785877236571079e-05, "loss": 0.8364, "step": 242940 }, { "epoch": 4.265348759634123, "grad_norm": 0.047161344442871074, "learning_rate": 1.2780819681204554e-05, "loss": 0.8349, "step": 242950 }, { "epoch": 4.265524324514124, "grad_norm": 0.05273534510922233, "learning_rate": 1.2775763243591932e-05, "loss": 0.8399, "step": 242960 }, { "epoch": 4.265699889394126, "grad_norm": 0.08333769304411634, "learning_rate": 1.2770707923797326e-05, "loss": 0.8411, "step": 242970 }, { "epoch": 4.265875454274127, "grad_norm": 0.05628447166174564, "learning_rate": 1.2765653721884704e-05, "loss": 0.8405, "step": 242980 }, { "epoch": 4.266051019154128, "grad_norm": 0.08744901385820075, "learning_rate": 1.2760600637918213e-05, "loss": 0.8375, "step": 242990 }, { "epoch": 4.26622658403413, "grad_norm": 0.054992318144549045, "learning_rate": 1.2755548671961794e-05, "loss": 0.8359, "step": 243000 }, { "epoch": 4.266402148914131, "grad_norm": 0.04011887718502325, "learning_rate": 1.2750497824079496e-05, "loss": 0.8359, "step": 243010 }, { "epoch": 4.266577713794133, "grad_norm": 0.057137715415160495, "learning_rate": 1.2745448094335313e-05, "loss": 0.8458, "step": 243020 }, { "epoch": 4.266753278674134, "grad_norm": 0.046452995244857284, "learning_rate": 1.27403994827932e-05, "loss": 0.8351, "step": 243030 }, { "epoch": 4.266928843554135, "grad_norm": 0.05181529693900153, "learning_rate": 1.2735351989517144e-05, "loss": 0.8382, "step": 243040 }, { "epoch": 4.267104408434137, "grad_norm": 0.0492889891166414, "learning_rate": 1.2730305614571088e-05, "loss": 0.8402, "step": 243050 }, { "epoch": 4.267279973314138, "grad_norm": 0.0566121258382044, "learning_rate": 1.2725260358018994e-05, "loss": 0.8419, "step": 243060 }, { "epoch": 4.26745553819414, "grad_norm": 0.0566407722717939, "learning_rate": 1.2720216219924764e-05, "loss": 0.8398, "step": 243070 }, { "epoch": 4.267631103074141, "grad_norm": 0.044445557952637806, "learning_rate": 1.2715173200352299e-05, "loss": 0.8414, "step": 243080 }, { "epoch": 4.267806667954142, "grad_norm": 0.05582752629034151, "learning_rate": 1.2710131299365532e-05, "loss": 0.8375, "step": 243090 }, { "epoch": 4.267982232834144, "grad_norm": 0.04707676102808554, "learning_rate": 1.2705090517028309e-05, "loss": 0.8342, "step": 243100 }, { "epoch": 4.268157797714145, "grad_norm": 0.05801833245749176, "learning_rate": 1.2700050853404499e-05, "loss": 0.8346, "step": 243110 }, { "epoch": 4.268333362594147, "grad_norm": 0.04425973051190119, "learning_rate": 1.2695012308557996e-05, "loss": 0.8419, "step": 243120 }, { "epoch": 4.268508927474148, "grad_norm": 0.06062811152420002, "learning_rate": 1.268997488255259e-05, "loss": 0.8387, "step": 243130 }, { "epoch": 4.26868449235415, "grad_norm": 0.04953693403392331, "learning_rate": 1.2684938575452146e-05, "loss": 0.8406, "step": 243140 }, { "epoch": 4.268860057234151, "grad_norm": 0.04941619131262898, "learning_rate": 1.2679903387320417e-05, "loss": 0.8455, "step": 243150 }, { "epoch": 4.269035622114152, "grad_norm": 0.05662076719839458, "learning_rate": 1.2674869318221274e-05, "loss": 0.84, "step": 243160 }, { "epoch": 4.269211186994154, "grad_norm": 0.051611473473385995, "learning_rate": 1.2669836368218446e-05, "loss": 0.8368, "step": 243170 }, { "epoch": 4.269386751874155, "grad_norm": 0.049678377370191276, "learning_rate": 1.2664804537375727e-05, "loss": 0.8385, "step": 243180 }, { "epoch": 4.269562316754157, "grad_norm": 0.0497058090007426, "learning_rate": 1.2659773825756877e-05, "loss": 0.8391, "step": 243190 }, { "epoch": 4.269737881634158, "grad_norm": 0.05156175076359754, "learning_rate": 1.2654744233425609e-05, "loss": 0.8367, "step": 243200 }, { "epoch": 4.26991344651416, "grad_norm": 0.08325740255104307, "learning_rate": 1.264971576044565e-05, "loss": 0.8399, "step": 243210 }, { "epoch": 4.270089011394161, "grad_norm": 0.08941909449304974, "learning_rate": 1.2644688406880731e-05, "loss": 0.8348, "step": 243220 }, { "epoch": 4.270264576274162, "grad_norm": 0.04486953615558507, "learning_rate": 1.2639662172794563e-05, "loss": 0.8398, "step": 243230 }, { "epoch": 4.270440141154164, "grad_norm": 0.06703572065971837, "learning_rate": 1.263463705825079e-05, "loss": 0.8422, "step": 243240 }, { "epoch": 4.270615706034165, "grad_norm": 0.07180279406068314, "learning_rate": 1.2629613063313103e-05, "loss": 0.8405, "step": 243250 }, { "epoch": 4.270791270914167, "grad_norm": 0.04764637589725297, "learning_rate": 1.2624590188045177e-05, "loss": 0.8435, "step": 243260 }, { "epoch": 4.270966835794168, "grad_norm": 0.04983073441931259, "learning_rate": 1.2619568432510603e-05, "loss": 0.8436, "step": 243270 }, { "epoch": 4.271142400674169, "grad_norm": 0.052205973676732007, "learning_rate": 1.2614547796773044e-05, "loss": 0.8321, "step": 243280 }, { "epoch": 4.271317965554171, "grad_norm": 0.06340742876882399, "learning_rate": 1.2609528280896107e-05, "loss": 0.8399, "step": 243290 }, { "epoch": 4.271493530434172, "grad_norm": 0.048725781619105374, "learning_rate": 1.2604509884943386e-05, "loss": 0.8325, "step": 243300 }, { "epoch": 4.271669095314174, "grad_norm": 0.05042177939892984, "learning_rate": 1.2599492608978478e-05, "loss": 0.8362, "step": 243310 }, { "epoch": 4.271844660194175, "grad_norm": 0.04866308406717237, "learning_rate": 1.2594476453064897e-05, "loss": 0.8361, "step": 243320 }, { "epoch": 4.272020225074177, "grad_norm": 0.054357421457897126, "learning_rate": 1.2589461417266286e-05, "loss": 0.8331, "step": 243330 }, { "epoch": 4.272195789954178, "grad_norm": 0.06580800503690988, "learning_rate": 1.2584447501646118e-05, "loss": 0.8388, "step": 243340 }, { "epoch": 4.272371354834179, "grad_norm": 0.052975265761652734, "learning_rate": 1.2579434706267945e-05, "loss": 0.84, "step": 243350 }, { "epoch": 4.2725469197141805, "grad_norm": 0.056365970003588145, "learning_rate": 1.2574423031195304e-05, "loss": 0.842, "step": 243360 }, { "epoch": 4.272722484594182, "grad_norm": 0.06056484754443047, "learning_rate": 1.2569412476491652e-05, "loss": 0.8351, "step": 243370 }, { "epoch": 4.2728980494741835, "grad_norm": 0.05079591387074029, "learning_rate": 1.2564403042220494e-05, "loss": 0.8393, "step": 243380 }, { "epoch": 4.2730736143541845, "grad_norm": 0.07508006807313614, "learning_rate": 1.255939472844529e-05, "loss": 0.8348, "step": 243390 }, { "epoch": 4.2732491792341865, "grad_norm": 0.05153144173683568, "learning_rate": 1.255438753522952e-05, "loss": 0.8299, "step": 243400 }, { "epoch": 4.2734247441141875, "grad_norm": 0.07374664230872349, "learning_rate": 1.25493814626366e-05, "loss": 0.8422, "step": 243410 }, { "epoch": 4.2736003089941885, "grad_norm": 0.04567364423263618, "learning_rate": 1.2544376510729972e-05, "loss": 0.8325, "step": 243420 }, { "epoch": 4.2737758738741904, "grad_norm": 0.058243286435696956, "learning_rate": 1.2539372679573052e-05, "loss": 0.8418, "step": 243430 }, { "epoch": 4.2739514387541915, "grad_norm": 0.04998344615254459, "learning_rate": 1.2534369969229224e-05, "loss": 0.8434, "step": 243440 }, { "epoch": 4.274127003634193, "grad_norm": 0.0641621688812465, "learning_rate": 1.2529368379761874e-05, "loss": 0.842, "step": 243450 }, { "epoch": 4.274302568514194, "grad_norm": 0.04724240979184091, "learning_rate": 1.2524367911234409e-05, "loss": 0.8343, "step": 243460 }, { "epoch": 4.2744781333941955, "grad_norm": 0.046323521757375315, "learning_rate": 1.2519368563710143e-05, "loss": 0.8485, "step": 243470 }, { "epoch": 4.274653698274197, "grad_norm": 0.042076019244133525, "learning_rate": 1.2514370337252454e-05, "loss": 0.8378, "step": 243480 }, { "epoch": 4.274829263154198, "grad_norm": 0.060162432442483815, "learning_rate": 1.2509373231924606e-05, "loss": 0.8379, "step": 243490 }, { "epoch": 4.2750048280342, "grad_norm": 0.044117710586214755, "learning_rate": 1.250437724779e-05, "loss": 0.8377, "step": 243500 }, { "epoch": 4.275180392914201, "grad_norm": 0.045737364504768126, "learning_rate": 1.2499382384911865e-05, "loss": 0.8426, "step": 243510 }, { "epoch": 4.275355957794202, "grad_norm": 0.04617984633480924, "learning_rate": 1.249438864335353e-05, "loss": 0.8416, "step": 243520 }, { "epoch": 4.275531522674204, "grad_norm": 0.06145220714159467, "learning_rate": 1.2489396023178266e-05, "loss": 0.8352, "step": 243530 }, { "epoch": 4.275707087554205, "grad_norm": 0.04939218493859776, "learning_rate": 1.2484404524449304e-05, "loss": 0.8385, "step": 243540 }, { "epoch": 4.275882652434207, "grad_norm": 0.05160087658782202, "learning_rate": 1.2479414147229896e-05, "loss": 0.8474, "step": 243550 }, { "epoch": 4.276058217314208, "grad_norm": 0.04938219240322841, "learning_rate": 1.2474424891583273e-05, "loss": 0.8375, "step": 243560 }, { "epoch": 4.27623378219421, "grad_norm": 0.06766591135845429, "learning_rate": 1.2469436757572673e-05, "loss": 0.8455, "step": 243570 }, { "epoch": 4.276409347074211, "grad_norm": 0.06488732805771705, "learning_rate": 1.2464449745261259e-05, "loss": 0.832, "step": 243580 }, { "epoch": 4.276584911954212, "grad_norm": 0.0474081851827153, "learning_rate": 1.2459463854712231e-05, "loss": 0.8365, "step": 243590 }, { "epoch": 4.276760476834214, "grad_norm": 0.05213510029850534, "learning_rate": 1.2454479085988787e-05, "loss": 0.8288, "step": 243600 }, { "epoch": 4.276936041714215, "grad_norm": 0.050651299567227394, "learning_rate": 1.2449495439154041e-05, "loss": 0.8401, "step": 243610 }, { "epoch": 4.277111606594217, "grad_norm": 0.04915316261878375, "learning_rate": 1.2444512914271158e-05, "loss": 0.8472, "step": 243620 }, { "epoch": 4.277287171474218, "grad_norm": 0.050877953736880255, "learning_rate": 1.2439531511403286e-05, "loss": 0.8357, "step": 243630 }, { "epoch": 4.27746273635422, "grad_norm": 0.08391780105072462, "learning_rate": 1.243455123061351e-05, "loss": 0.8362, "step": 243640 }, { "epoch": 4.277638301234221, "grad_norm": 0.05135827770875194, "learning_rate": 1.2429572071964962e-05, "loss": 0.8383, "step": 243650 }, { "epoch": 4.277813866114222, "grad_norm": 0.05407052655003994, "learning_rate": 1.242459403552067e-05, "loss": 0.8455, "step": 243660 }, { "epoch": 4.277989430994224, "grad_norm": 0.048508402678191564, "learning_rate": 1.2419617121343777e-05, "loss": 0.8476, "step": 243670 }, { "epoch": 4.278164995874225, "grad_norm": 0.054196432852187906, "learning_rate": 1.2414641329497306e-05, "loss": 0.8365, "step": 243680 }, { "epoch": 4.278340560754227, "grad_norm": 0.05496555080497052, "learning_rate": 1.2409666660044294e-05, "loss": 0.8339, "step": 243690 }, { "epoch": 4.278516125634228, "grad_norm": 0.04270982395789623, "learning_rate": 1.2404693113047808e-05, "loss": 0.8401, "step": 243700 }, { "epoch": 4.278691690514229, "grad_norm": 0.03883944820608379, "learning_rate": 1.2399720688570824e-05, "loss": 0.8365, "step": 243710 }, { "epoch": 4.278867255394231, "grad_norm": 0.04854759632841802, "learning_rate": 1.2394749386676355e-05, "loss": 0.8411, "step": 243720 }, { "epoch": 4.279042820274232, "grad_norm": 0.05427797683607138, "learning_rate": 1.2389779207427397e-05, "loss": 0.8332, "step": 243730 }, { "epoch": 4.279218385154234, "grad_norm": 0.0472257753493364, "learning_rate": 1.2384810150886935e-05, "loss": 0.8361, "step": 243740 }, { "epoch": 4.279393950034235, "grad_norm": 0.050253822948359325, "learning_rate": 1.2379842217117888e-05, "loss": 0.8388, "step": 243750 }, { "epoch": 4.279569514914236, "grad_norm": 0.06059474493561488, "learning_rate": 1.2374875406183229e-05, "loss": 0.8394, "step": 243760 }, { "epoch": 4.279745079794238, "grad_norm": 0.05263274809284093, "learning_rate": 1.2369909718145898e-05, "loss": 0.8362, "step": 243770 }, { "epoch": 4.279920644674239, "grad_norm": 0.05525979734134049, "learning_rate": 1.2364945153068785e-05, "loss": 0.8417, "step": 243780 }, { "epoch": 4.280096209554241, "grad_norm": 0.056888199345068974, "learning_rate": 1.2359981711014796e-05, "loss": 0.8445, "step": 243790 }, { "epoch": 4.280271774434242, "grad_norm": 0.046249360809967645, "learning_rate": 1.2355019392046851e-05, "loss": 0.8387, "step": 243800 }, { "epoch": 4.280447339314244, "grad_norm": 0.05735332603008036, "learning_rate": 1.2350058196227778e-05, "loss": 0.8363, "step": 243810 }, { "epoch": 4.280622904194245, "grad_norm": 0.0645610975812796, "learning_rate": 1.2345098123620482e-05, "loss": 0.8318, "step": 243820 }, { "epoch": 4.280798469074246, "grad_norm": 0.040216104688058994, "learning_rate": 1.2340139174287741e-05, "loss": 0.8386, "step": 243830 }, { "epoch": 4.280974033954248, "grad_norm": 0.06042383455798125, "learning_rate": 1.2335181348292457e-05, "loss": 0.8401, "step": 243840 }, { "epoch": 4.281149598834249, "grad_norm": 0.04745865846991535, "learning_rate": 1.2330224645697405e-05, "loss": 0.8469, "step": 243850 }, { "epoch": 4.281325163714251, "grad_norm": 0.048553339624766295, "learning_rate": 1.232526906656541e-05, "loss": 0.8404, "step": 243860 }, { "epoch": 4.281500728594252, "grad_norm": 0.05233218673761235, "learning_rate": 1.232031461095926e-05, "loss": 0.8424, "step": 243870 }, { "epoch": 4.281676293474254, "grad_norm": 0.05740765680617244, "learning_rate": 1.2315361278941698e-05, "loss": 0.8346, "step": 243880 }, { "epoch": 4.281851858354255, "grad_norm": 0.05031566371650933, "learning_rate": 1.2310409070575515e-05, "loss": 0.8344, "step": 243890 }, { "epoch": 4.282027423234256, "grad_norm": 0.05562503482460219, "learning_rate": 1.2305457985923438e-05, "loss": 0.833, "step": 243900 }, { "epoch": 4.282202988114258, "grad_norm": 0.04205336889276104, "learning_rate": 1.2300508025048223e-05, "loss": 0.8484, "step": 243910 }, { "epoch": 4.282378552994259, "grad_norm": 0.06153711526015306, "learning_rate": 1.2295559188012562e-05, "loss": 0.8405, "step": 243920 }, { "epoch": 4.282554117874261, "grad_norm": 0.049053204596296016, "learning_rate": 1.2290611474879163e-05, "loss": 0.8426, "step": 243930 }, { "epoch": 4.282729682754262, "grad_norm": 0.044307363271849975, "learning_rate": 1.2285664885710731e-05, "loss": 0.8357, "step": 243940 }, { "epoch": 4.282905247634263, "grad_norm": 0.041510616772437066, "learning_rate": 1.2280719420569908e-05, "loss": 0.8403, "step": 243950 }, { "epoch": 4.283080812514265, "grad_norm": 0.058839105276302844, "learning_rate": 1.2275775079519377e-05, "loss": 0.8404, "step": 243960 }, { "epoch": 4.283256377394266, "grad_norm": 0.06109807562893681, "learning_rate": 1.2270831862621798e-05, "loss": 0.8315, "step": 243970 }, { "epoch": 4.283431942274268, "grad_norm": 0.061233678325721776, "learning_rate": 1.2265889769939765e-05, "loss": 0.8326, "step": 243980 }, { "epoch": 4.283607507154269, "grad_norm": 0.057797339204325934, "learning_rate": 1.226094880153594e-05, "loss": 0.8335, "step": 243990 }, { "epoch": 4.283783072034271, "grad_norm": 0.05876496182166458, "learning_rate": 1.225600895747285e-05, "loss": 0.8392, "step": 244000 }, { "epoch": 4.283958636914272, "grad_norm": 0.07178437421614126, "learning_rate": 1.225107023781318e-05, "loss": 0.8403, "step": 244010 }, { "epoch": 4.284134201794273, "grad_norm": 0.055981957244261145, "learning_rate": 1.2246132642619437e-05, "loss": 0.8405, "step": 244020 }, { "epoch": 4.284309766674275, "grad_norm": 0.049854149502988276, "learning_rate": 1.2241196171954205e-05, "loss": 0.8437, "step": 244030 }, { "epoch": 4.284485331554276, "grad_norm": 0.05919760452852691, "learning_rate": 1.2236260825880045e-05, "loss": 0.8396, "step": 244040 }, { "epoch": 4.284660896434278, "grad_norm": 0.049802380117870006, "learning_rate": 1.223132660445945e-05, "loss": 0.8427, "step": 244050 }, { "epoch": 4.284836461314279, "grad_norm": 0.05328056722470482, "learning_rate": 1.2226393507754969e-05, "loss": 0.8354, "step": 244060 }, { "epoch": 4.285012026194281, "grad_norm": 0.047962797685726914, "learning_rate": 1.2221461535829087e-05, "loss": 0.8339, "step": 244070 }, { "epoch": 4.285187591074282, "grad_norm": 0.05770526882285774, "learning_rate": 1.2216530688744319e-05, "loss": 0.833, "step": 244080 }, { "epoch": 4.285363155954283, "grad_norm": 0.06018105635588434, "learning_rate": 1.2211600966563113e-05, "loss": 0.8365, "step": 244090 }, { "epoch": 4.2855387208342846, "grad_norm": 0.04529728553208439, "learning_rate": 1.2206672369347935e-05, "loss": 0.8344, "step": 244100 }, { "epoch": 4.285714285714286, "grad_norm": 0.042049017873484605, "learning_rate": 1.2201744897161244e-05, "loss": 0.8509, "step": 244110 }, { "epoch": 4.2858898505942875, "grad_norm": 0.048939733438797664, "learning_rate": 1.2196818550065463e-05, "loss": 0.8404, "step": 244120 }, { "epoch": 4.2860654154742885, "grad_norm": 0.04281931902607773, "learning_rate": 1.2191893328123e-05, "loss": 0.8356, "step": 244130 }, { "epoch": 4.28624098035429, "grad_norm": 0.05186885760579349, "learning_rate": 1.2186969231396281e-05, "loss": 0.8378, "step": 244140 }, { "epoch": 4.2864165452342915, "grad_norm": 0.06022206209170929, "learning_rate": 1.2182046259947675e-05, "loss": 0.8308, "step": 244150 }, { "epoch": 4.2865921101142925, "grad_norm": 0.06510160525969833, "learning_rate": 1.2177124413839574e-05, "loss": 0.8472, "step": 244160 }, { "epoch": 4.2867676749942945, "grad_norm": 0.06106186038622757, "learning_rate": 1.2172203693134301e-05, "loss": 0.8378, "step": 244170 }, { "epoch": 4.2869432398742955, "grad_norm": 0.04491228226120275, "learning_rate": 1.2167284097894267e-05, "loss": 0.8419, "step": 244180 }, { "epoch": 4.2871188047542965, "grad_norm": 0.047031480994144344, "learning_rate": 1.2162365628181743e-05, "loss": 0.8391, "step": 244190 }, { "epoch": 4.287294369634298, "grad_norm": 0.07452425075593842, "learning_rate": 1.2157448284059073e-05, "loss": 0.8356, "step": 244200 }, { "epoch": 4.2874699345142995, "grad_norm": 0.047679999457830434, "learning_rate": 1.215253206558859e-05, "loss": 0.8399, "step": 244210 }, { "epoch": 4.287645499394301, "grad_norm": 0.042947432672802856, "learning_rate": 1.2147616972832523e-05, "loss": 0.8356, "step": 244220 }, { "epoch": 4.287821064274302, "grad_norm": 0.047582918424767126, "learning_rate": 1.2142703005853184e-05, "loss": 0.8351, "step": 244230 }, { "epoch": 4.287996629154304, "grad_norm": 0.05247348038271087, "learning_rate": 1.2137790164712814e-05, "loss": 0.8357, "step": 244240 }, { "epoch": 4.288172194034305, "grad_norm": 0.04751592854969255, "learning_rate": 1.2132878449473704e-05, "loss": 0.8339, "step": 244250 }, { "epoch": 4.288347758914306, "grad_norm": 0.04389219572185419, "learning_rate": 1.2127967860198036e-05, "loss": 0.8383, "step": 244260 }, { "epoch": 4.288523323794308, "grad_norm": 0.05636238057894353, "learning_rate": 1.2123058396948048e-05, "loss": 0.8441, "step": 244270 }, { "epoch": 4.288698888674309, "grad_norm": 0.06381700937898929, "learning_rate": 1.2118150059785957e-05, "loss": 0.8402, "step": 244280 }, { "epoch": 4.288874453554311, "grad_norm": 0.08597102125226226, "learning_rate": 1.2113242848773923e-05, "loss": 0.8361, "step": 244290 }, { "epoch": 4.289050018434312, "grad_norm": 0.05059543778871967, "learning_rate": 1.210833676397414e-05, "loss": 0.8379, "step": 244300 }, { "epoch": 4.289225583314314, "grad_norm": 0.06561227426414713, "learning_rate": 1.2103431805448778e-05, "loss": 0.8375, "step": 244310 }, { "epoch": 4.289401148194315, "grad_norm": 0.04445657732310863, "learning_rate": 1.2098527973259957e-05, "loss": 0.8408, "step": 244320 }, { "epoch": 4.289576713074316, "grad_norm": 0.058941198482326776, "learning_rate": 1.2093625267469846e-05, "loss": 0.8367, "step": 244330 }, { "epoch": 4.289752277954318, "grad_norm": 0.07700066389434689, "learning_rate": 1.20887236881405e-05, "loss": 0.8451, "step": 244340 }, { "epoch": 4.289927842834319, "grad_norm": 0.04336136946572611, "learning_rate": 1.2083823235334105e-05, "loss": 0.8421, "step": 244350 }, { "epoch": 4.290103407714321, "grad_norm": 0.04683432933984994, "learning_rate": 1.2078923909112697e-05, "loss": 0.8389, "step": 244360 }, { "epoch": 4.290278972594322, "grad_norm": 0.04870168248029764, "learning_rate": 1.2074025709538356e-05, "loss": 0.8343, "step": 244370 }, { "epoch": 4.290454537474323, "grad_norm": 0.05420940425705515, "learning_rate": 1.2069128636673168e-05, "loss": 0.8414, "step": 244380 }, { "epoch": 4.290630102354325, "grad_norm": 0.04779517429713064, "learning_rate": 1.206423269057915e-05, "loss": 0.8427, "step": 244390 }, { "epoch": 4.290805667234326, "grad_norm": 0.050010359086229354, "learning_rate": 1.2059337871318355e-05, "loss": 0.8403, "step": 244400 }, { "epoch": 4.290981232114328, "grad_norm": 0.047812748664901476, "learning_rate": 1.2054444178952784e-05, "loss": 0.8436, "step": 244410 }, { "epoch": 4.291156796994329, "grad_norm": 0.04486403045550754, "learning_rate": 1.2049551613544471e-05, "loss": 0.8382, "step": 244420 }, { "epoch": 4.29133236187433, "grad_norm": 0.044516154820135406, "learning_rate": 1.2044660175155367e-05, "loss": 0.8359, "step": 244430 }, { "epoch": 4.291507926754332, "grad_norm": 0.04461820890790119, "learning_rate": 1.2039769863847481e-05, "loss": 0.8456, "step": 244440 }, { "epoch": 4.291683491634333, "grad_norm": 0.05209887859408612, "learning_rate": 1.2034880679682758e-05, "loss": 0.8317, "step": 244450 }, { "epoch": 4.291859056514335, "grad_norm": 0.08843145846260526, "learning_rate": 1.2029992622723137e-05, "loss": 0.8337, "step": 244460 }, { "epoch": 4.292034621394336, "grad_norm": 0.059214530072757496, "learning_rate": 1.2025105693030563e-05, "loss": 0.839, "step": 244470 }, { "epoch": 4.292210186274338, "grad_norm": 0.04979769824776254, "learning_rate": 1.202021989066697e-05, "loss": 0.8413, "step": 244480 }, { "epoch": 4.292385751154339, "grad_norm": 0.042705671856737605, "learning_rate": 1.2015335215694229e-05, "loss": 0.8427, "step": 244490 }, { "epoch": 4.29256131603434, "grad_norm": 0.053710309926316424, "learning_rate": 1.2010451668174255e-05, "loss": 0.8303, "step": 244500 }, { "epoch": 4.292736880914342, "grad_norm": 0.048456267184408264, "learning_rate": 1.2005569248168879e-05, "loss": 0.8405, "step": 244510 }, { "epoch": 4.292912445794343, "grad_norm": 0.05784210572377025, "learning_rate": 1.2000687955740042e-05, "loss": 0.8368, "step": 244520 }, { "epoch": 4.293088010674345, "grad_norm": 0.05336795441289424, "learning_rate": 1.1995807790949516e-05, "loss": 0.8376, "step": 244530 }, { "epoch": 4.293263575554346, "grad_norm": 0.05726096718251255, "learning_rate": 1.1990928753859163e-05, "loss": 0.8418, "step": 244540 }, { "epoch": 4.293439140434348, "grad_norm": 0.06431257811598978, "learning_rate": 1.1986050844530824e-05, "loss": 0.8424, "step": 244550 }, { "epoch": 4.293614705314349, "grad_norm": 0.07161094238151122, "learning_rate": 1.198117406302626e-05, "loss": 0.8397, "step": 244560 }, { "epoch": 4.29379027019435, "grad_norm": 0.06980730206888551, "learning_rate": 1.1976298409407284e-05, "loss": 0.8425, "step": 244570 }, { "epoch": 4.293965835074352, "grad_norm": 0.05341142964196267, "learning_rate": 1.1971423883735664e-05, "loss": 0.8345, "step": 244580 }, { "epoch": 4.294141399954353, "grad_norm": 0.05739001195714825, "learning_rate": 1.1966550486073185e-05, "loss": 0.8368, "step": 244590 }, { "epoch": 4.294316964834355, "grad_norm": 0.05161584704525461, "learning_rate": 1.1961678216481551e-05, "loss": 0.8393, "step": 244600 }, { "epoch": 4.294492529714356, "grad_norm": 0.04138233396747195, "learning_rate": 1.1956807075022533e-05, "loss": 0.8394, "step": 244610 }, { "epoch": 4.294668094594357, "grad_norm": 0.054821080133501496, "learning_rate": 1.1951937061757834e-05, "loss": 0.8335, "step": 244620 }, { "epoch": 4.294843659474359, "grad_norm": 0.05021169979814272, "learning_rate": 1.194706817674915e-05, "loss": 0.8436, "step": 244630 }, { "epoch": 4.29501922435436, "grad_norm": 0.04433458797325256, "learning_rate": 1.1942200420058183e-05, "loss": 0.8454, "step": 244640 }, { "epoch": 4.295194789234362, "grad_norm": 0.047970805217978175, "learning_rate": 1.193733379174662e-05, "loss": 0.8412, "step": 244650 }, { "epoch": 4.295370354114363, "grad_norm": 0.0624123001053848, "learning_rate": 1.1932468291876082e-05, "loss": 0.8378, "step": 244660 }, { "epoch": 4.295545918994365, "grad_norm": 0.13142262994387713, "learning_rate": 1.1927603920508269e-05, "loss": 0.8317, "step": 244670 }, { "epoch": 4.295721483874366, "grad_norm": 0.060142868420599, "learning_rate": 1.1922740677704747e-05, "loss": 0.8485, "step": 244680 }, { "epoch": 4.295897048754367, "grad_norm": 0.06098260564595067, "learning_rate": 1.1917878563527204e-05, "loss": 0.8337, "step": 244690 }, { "epoch": 4.296072613634369, "grad_norm": 0.0439301758572402, "learning_rate": 1.1913017578037192e-05, "loss": 0.8416, "step": 244700 }, { "epoch": 4.29624817851437, "grad_norm": 0.05150725746110822, "learning_rate": 1.1908157721296323e-05, "loss": 0.8365, "step": 244710 }, { "epoch": 4.296423743394372, "grad_norm": 0.05683223346221046, "learning_rate": 1.1903298993366186e-05, "loss": 0.8343, "step": 244720 }, { "epoch": 4.296599308274373, "grad_norm": 0.07657865853627895, "learning_rate": 1.1898441394308313e-05, "loss": 0.8272, "step": 244730 }, { "epoch": 4.296774873154374, "grad_norm": 0.05867583535768245, "learning_rate": 1.1893584924184263e-05, "loss": 0.842, "step": 244740 }, { "epoch": 4.296950438034376, "grad_norm": 0.05619390364604521, "learning_rate": 1.188872958305557e-05, "loss": 0.8356, "step": 244750 }, { "epoch": 4.297126002914377, "grad_norm": 0.053905479259493023, "learning_rate": 1.1883875370983766e-05, "loss": 0.8382, "step": 244760 }, { "epoch": 4.297301567794379, "grad_norm": 0.04140738071273563, "learning_rate": 1.1879022288030323e-05, "loss": 0.8421, "step": 244770 }, { "epoch": 4.29747713267438, "grad_norm": 0.054064051725794794, "learning_rate": 1.187417033425674e-05, "loss": 0.8337, "step": 244780 }, { "epoch": 4.297652697554382, "grad_norm": 0.06361684316238882, "learning_rate": 1.186931950972453e-05, "loss": 0.8399, "step": 244790 }, { "epoch": 4.297828262434383, "grad_norm": 0.054400890811550494, "learning_rate": 1.1864469814495101e-05, "loss": 0.8337, "step": 244800 }, { "epoch": 4.298003827314384, "grad_norm": 0.05954778250865471, "learning_rate": 1.1859621248629923e-05, "loss": 0.8342, "step": 244810 }, { "epoch": 4.298179392194386, "grad_norm": 0.05032897457704196, "learning_rate": 1.1854773812190442e-05, "loss": 0.8373, "step": 244820 }, { "epoch": 4.298354957074387, "grad_norm": 0.05139830820780887, "learning_rate": 1.1849927505238043e-05, "loss": 0.8426, "step": 244830 }, { "epoch": 4.298530521954389, "grad_norm": 0.049824423174182834, "learning_rate": 1.184508232783417e-05, "loss": 0.8479, "step": 244840 }, { "epoch": 4.29870608683439, "grad_norm": 0.03996273977131417, "learning_rate": 1.1840238280040152e-05, "loss": 0.8504, "step": 244850 }, { "epoch": 4.298881651714391, "grad_norm": 0.08172327167555041, "learning_rate": 1.1835395361917442e-05, "loss": 0.8376, "step": 244860 }, { "epoch": 4.2990572165943925, "grad_norm": 0.06594967981822622, "learning_rate": 1.1830553573527336e-05, "loss": 0.8478, "step": 244870 }, { "epoch": 4.299232781474394, "grad_norm": 0.05557580843759508, "learning_rate": 1.1825712914931214e-05, "loss": 0.8378, "step": 244880 }, { "epoch": 4.2994083463543955, "grad_norm": 0.054667818135308716, "learning_rate": 1.1820873386190424e-05, "loss": 0.8393, "step": 244890 }, { "epoch": 4.2995839112343965, "grad_norm": 0.057441578236777, "learning_rate": 1.1816034987366228e-05, "loss": 0.8326, "step": 244900 }, { "epoch": 4.2997594761143985, "grad_norm": 0.05380601850319926, "learning_rate": 1.1811197718519953e-05, "loss": 0.8404, "step": 244910 }, { "epoch": 4.2999350409943995, "grad_norm": 0.061545844638529454, "learning_rate": 1.1806361579712918e-05, "loss": 0.8383, "step": 244920 }, { "epoch": 4.3001106058744005, "grad_norm": 0.04584977686480838, "learning_rate": 1.1801526571006375e-05, "loss": 0.8343, "step": 244930 }, { "epoch": 4.3002861707544024, "grad_norm": 0.04900715950556405, "learning_rate": 1.1796692692461576e-05, "loss": 0.8275, "step": 244940 }, { "epoch": 4.3004617356344035, "grad_norm": 0.04004644341141746, "learning_rate": 1.179185994413978e-05, "loss": 0.8445, "step": 244950 }, { "epoch": 4.300637300514405, "grad_norm": 0.06069050558636753, "learning_rate": 1.1787028326102224e-05, "loss": 0.839, "step": 244960 }, { "epoch": 4.300812865394406, "grad_norm": 0.07190191801390593, "learning_rate": 1.17821978384101e-05, "loss": 0.84, "step": 244970 }, { "epoch": 4.300988430274408, "grad_norm": 0.04212830601291455, "learning_rate": 1.1777368481124629e-05, "loss": 0.8465, "step": 244980 }, { "epoch": 4.301163995154409, "grad_norm": 0.07155635379873759, "learning_rate": 1.1772540254307022e-05, "loss": 0.8379, "step": 244990 }, { "epoch": 4.30133956003441, "grad_norm": 0.043870891887915583, "learning_rate": 1.1767713158018406e-05, "loss": 0.8259, "step": 245000 }, { "epoch": 4.301515124914412, "grad_norm": 0.05058865515048825, "learning_rate": 1.1762887192319988e-05, "loss": 0.8429, "step": 245010 }, { "epoch": 4.301690689794413, "grad_norm": 0.05385236640474205, "learning_rate": 1.1758062357272854e-05, "loss": 0.8299, "step": 245020 }, { "epoch": 4.301866254674415, "grad_norm": 0.06154945049895782, "learning_rate": 1.1753238652938212e-05, "loss": 0.8399, "step": 245030 }, { "epoch": 4.302041819554416, "grad_norm": 0.048673559789500394, "learning_rate": 1.1748416079377119e-05, "loss": 0.8493, "step": 245040 }, { "epoch": 4.302217384434417, "grad_norm": 0.04804337326424673, "learning_rate": 1.1743594636650701e-05, "loss": 0.8344, "step": 245050 }, { "epoch": 4.302392949314419, "grad_norm": 0.06578787998663972, "learning_rate": 1.1738774324820061e-05, "loss": 0.8368, "step": 245060 }, { "epoch": 4.30256851419442, "grad_norm": 0.04538952960993355, "learning_rate": 1.1733955143946243e-05, "loss": 0.8338, "step": 245070 }, { "epoch": 4.302744079074422, "grad_norm": 0.04723023669017851, "learning_rate": 1.1729137094090309e-05, "loss": 0.8327, "step": 245080 }, { "epoch": 4.302919643954423, "grad_norm": 0.05020374107220653, "learning_rate": 1.1724320175313325e-05, "loss": 0.8375, "step": 245090 }, { "epoch": 4.303095208834424, "grad_norm": 0.052560736988129915, "learning_rate": 1.1719504387676336e-05, "loss": 0.8437, "step": 245100 }, { "epoch": 4.303270773714426, "grad_norm": 0.04936403507502994, "learning_rate": 1.1714689731240314e-05, "loss": 0.8385, "step": 245110 }, { "epoch": 4.303446338594427, "grad_norm": 0.05041446251194637, "learning_rate": 1.1709876206066293e-05, "loss": 0.8382, "step": 245120 }, { "epoch": 4.303621903474429, "grad_norm": 0.05553838402557932, "learning_rate": 1.1705063812215258e-05, "loss": 0.8364, "step": 245130 }, { "epoch": 4.30379746835443, "grad_norm": 0.04649232961564507, "learning_rate": 1.170025254974817e-05, "loss": 0.8496, "step": 245140 }, { "epoch": 4.303973033234432, "grad_norm": 0.049938812925953986, "learning_rate": 1.1695442418725985e-05, "loss": 0.8398, "step": 245150 }, { "epoch": 4.304148598114433, "grad_norm": 0.046526860298201834, "learning_rate": 1.1690633419209685e-05, "loss": 0.8302, "step": 245160 }, { "epoch": 4.304324162994434, "grad_norm": 0.04704129099535935, "learning_rate": 1.168582555126016e-05, "loss": 0.8436, "step": 245170 }, { "epoch": 4.304499727874436, "grad_norm": 0.05093876760221799, "learning_rate": 1.1681018814938352e-05, "loss": 0.839, "step": 245180 }, { "epoch": 4.304675292754437, "grad_norm": 0.0711765274780771, "learning_rate": 1.1676213210305125e-05, "loss": 0.8413, "step": 245190 }, { "epoch": 4.304850857634439, "grad_norm": 0.043965545015763156, "learning_rate": 1.1671408737421436e-05, "loss": 0.8306, "step": 245200 }, { "epoch": 4.30502642251444, "grad_norm": 0.050452756241773304, "learning_rate": 1.1666605396348085e-05, "loss": 0.8441, "step": 245210 }, { "epoch": 4.305201987394442, "grad_norm": 0.07116607183796357, "learning_rate": 1.1661803187145976e-05, "loss": 0.8433, "step": 245220 }, { "epoch": 4.305377552274443, "grad_norm": 0.05053123597153073, "learning_rate": 1.1657002109875953e-05, "loss": 0.8428, "step": 245230 }, { "epoch": 4.305553117154444, "grad_norm": 0.05060172310495273, "learning_rate": 1.1652202164598824e-05, "loss": 0.8362, "step": 245240 }, { "epoch": 4.305728682034446, "grad_norm": 0.05582020383401215, "learning_rate": 1.1647403351375416e-05, "loss": 0.8354, "step": 245250 }, { "epoch": 4.305904246914447, "grad_norm": 0.049964122501737485, "learning_rate": 1.1642605670266527e-05, "loss": 0.8337, "step": 245260 }, { "epoch": 4.306079811794449, "grad_norm": 0.05332377230219553, "learning_rate": 1.163780912133297e-05, "loss": 0.8387, "step": 245270 }, { "epoch": 4.30625537667445, "grad_norm": 0.11333517702182735, "learning_rate": 1.163301370463549e-05, "loss": 0.8337, "step": 245280 }, { "epoch": 4.306430941554451, "grad_norm": 0.05635449189900751, "learning_rate": 1.1628219420234843e-05, "loss": 0.8362, "step": 245290 }, { "epoch": 4.306606506434453, "grad_norm": 0.05146107119301413, "learning_rate": 1.1623426268191796e-05, "loss": 0.8462, "step": 245300 }, { "epoch": 4.306782071314454, "grad_norm": 0.06593791671290132, "learning_rate": 1.1618634248567057e-05, "loss": 0.8403, "step": 245310 }, { "epoch": 4.306957636194456, "grad_norm": 0.05097764203100669, "learning_rate": 1.1613843361421354e-05, "loss": 0.8308, "step": 245320 }, { "epoch": 4.307133201074457, "grad_norm": 0.04000073956058394, "learning_rate": 1.1609053606815406e-05, "loss": 0.8378, "step": 245330 }, { "epoch": 4.307308765954459, "grad_norm": 0.04542219497467321, "learning_rate": 1.1604264984809849e-05, "loss": 0.841, "step": 245340 }, { "epoch": 4.30748433083446, "grad_norm": 0.047297998394782076, "learning_rate": 1.1599477495465416e-05, "loss": 0.8413, "step": 245350 }, { "epoch": 4.307659895714461, "grad_norm": 0.043103117572115386, "learning_rate": 1.1594691138842703e-05, "loss": 0.8393, "step": 245360 }, { "epoch": 4.307835460594463, "grad_norm": 0.053223814130104, "learning_rate": 1.1589905915002424e-05, "loss": 0.8423, "step": 245370 }, { "epoch": 4.308011025474464, "grad_norm": 0.046327531610138084, "learning_rate": 1.1585121824005141e-05, "loss": 0.8449, "step": 245380 }, { "epoch": 4.308186590354466, "grad_norm": 0.043936658117397706, "learning_rate": 1.1580338865911516e-05, "loss": 0.8401, "step": 245390 }, { "epoch": 4.308362155234467, "grad_norm": 0.053265140330648116, "learning_rate": 1.1575557040782142e-05, "loss": 0.8399, "step": 245400 }, { "epoch": 4.308537720114468, "grad_norm": 0.04534178750566319, "learning_rate": 1.1570776348677593e-05, "loss": 0.8386, "step": 245410 }, { "epoch": 4.30871328499447, "grad_norm": 0.047724339597722605, "learning_rate": 1.1565996789658428e-05, "loss": 0.8437, "step": 245420 }, { "epoch": 4.308888849874471, "grad_norm": 0.0556544273913987, "learning_rate": 1.1561218363785243e-05, "loss": 0.8411, "step": 245430 }, { "epoch": 4.309064414754473, "grad_norm": 0.06107082268735241, "learning_rate": 1.1556441071118567e-05, "loss": 0.8303, "step": 245440 }, { "epoch": 4.309239979634474, "grad_norm": 0.05201781316174373, "learning_rate": 1.1551664911718907e-05, "loss": 0.8392, "step": 245450 }, { "epoch": 4.309415544514476, "grad_norm": 0.05802487428795362, "learning_rate": 1.15468898856468e-05, "loss": 0.8366, "step": 245460 }, { "epoch": 4.309591109394477, "grad_norm": 0.05443425954780417, "learning_rate": 1.1542115992962769e-05, "loss": 0.8357, "step": 245470 }, { "epoch": 4.309766674274478, "grad_norm": 0.048033804130247, "learning_rate": 1.1537343233727235e-05, "loss": 0.8346, "step": 245480 }, { "epoch": 4.30994223915448, "grad_norm": 0.0578134773629109, "learning_rate": 1.1532571608000725e-05, "loss": 0.8296, "step": 245490 }, { "epoch": 4.310117804034481, "grad_norm": 0.05347676335260744, "learning_rate": 1.1527801115843685e-05, "loss": 0.8386, "step": 245500 }, { "epoch": 4.310293368914483, "grad_norm": 0.05176060807033332, "learning_rate": 1.1523031757316543e-05, "loss": 0.8336, "step": 245510 }, { "epoch": 4.310468933794484, "grad_norm": 0.05100448724346923, "learning_rate": 1.1518263532479747e-05, "loss": 0.836, "step": 245520 }, { "epoch": 4.310644498674485, "grad_norm": 0.0546506104409918, "learning_rate": 1.1513496441393665e-05, "loss": 0.8407, "step": 245530 }, { "epoch": 4.310820063554487, "grad_norm": 0.04632124770484475, "learning_rate": 1.1508730484118773e-05, "loss": 0.8444, "step": 245540 }, { "epoch": 4.310995628434488, "grad_norm": 0.04812895952692338, "learning_rate": 1.1503965660715402e-05, "loss": 0.8388, "step": 245550 }, { "epoch": 4.31117119331449, "grad_norm": 0.05538867998470552, "learning_rate": 1.149920197124394e-05, "loss": 0.8369, "step": 245560 }, { "epoch": 4.311346758194491, "grad_norm": 0.06117540188164554, "learning_rate": 1.1494439415764743e-05, "loss": 0.8413, "step": 245570 }, { "epoch": 4.311522323074493, "grad_norm": 0.04374815998504964, "learning_rate": 1.148967799433815e-05, "loss": 0.832, "step": 245580 }, { "epoch": 4.311697887954494, "grad_norm": 0.04716904052808959, "learning_rate": 1.1484917707024487e-05, "loss": 0.8407, "step": 245590 }, { "epoch": 4.311873452834495, "grad_norm": 0.04631750071267555, "learning_rate": 1.1480158553884076e-05, "loss": 0.8392, "step": 245600 }, { "epoch": 4.312049017714497, "grad_norm": 0.07910281987274533, "learning_rate": 1.1475400534977226e-05, "loss": 0.8409, "step": 245610 }, { "epoch": 4.312224582594498, "grad_norm": 0.053538726836829954, "learning_rate": 1.147064365036419e-05, "loss": 0.8422, "step": 245620 }, { "epoch": 4.3124001474744995, "grad_norm": 0.03910904133532887, "learning_rate": 1.1465887900105254e-05, "loss": 0.8489, "step": 245630 }, { "epoch": 4.3125757123545005, "grad_norm": 0.04639232600582583, "learning_rate": 1.1461133284260706e-05, "loss": 0.8386, "step": 245640 }, { "epoch": 4.3127512772345025, "grad_norm": 0.04757575092060136, "learning_rate": 1.1456379802890732e-05, "loss": 0.838, "step": 245650 }, { "epoch": 4.3129268421145035, "grad_norm": 0.06307347335514871, "learning_rate": 1.1451627456055586e-05, "loss": 0.8407, "step": 245660 }, { "epoch": 4.3131024069945045, "grad_norm": 0.05123451234815867, "learning_rate": 1.1446876243815505e-05, "loss": 0.8363, "step": 245670 }, { "epoch": 4.3132779718745065, "grad_norm": 0.047281794257342906, "learning_rate": 1.1442126166230646e-05, "loss": 0.8321, "step": 245680 }, { "epoch": 4.3134535367545075, "grad_norm": 0.04662344887414292, "learning_rate": 1.1437377223361227e-05, "loss": 0.8388, "step": 245690 }, { "epoch": 4.313629101634509, "grad_norm": 0.06220551376782061, "learning_rate": 1.1432629415267376e-05, "loss": 0.8395, "step": 245700 }, { "epoch": 4.3138046665145104, "grad_norm": 0.0473934379307805, "learning_rate": 1.1427882742009305e-05, "loss": 0.8399, "step": 245710 }, { "epoch": 4.3139802313945115, "grad_norm": 0.05445678040843425, "learning_rate": 1.1423137203647102e-05, "loss": 0.8422, "step": 245720 }, { "epoch": 4.314155796274513, "grad_norm": 0.05158136010326667, "learning_rate": 1.1418392800240935e-05, "loss": 0.8308, "step": 245730 }, { "epoch": 4.314331361154514, "grad_norm": 0.08153596834331182, "learning_rate": 1.1413649531850898e-05, "loss": 0.8432, "step": 245740 }, { "epoch": 4.314506926034516, "grad_norm": 0.05755127471958991, "learning_rate": 1.1408907398537087e-05, "loss": 0.8378, "step": 245750 }, { "epoch": 4.314682490914517, "grad_norm": 0.05719940036906305, "learning_rate": 1.1404166400359586e-05, "loss": 0.8424, "step": 245760 }, { "epoch": 4.314858055794518, "grad_norm": 0.04789681212576469, "learning_rate": 1.1399426537378467e-05, "loss": 0.8363, "step": 245770 }, { "epoch": 4.31503362067452, "grad_norm": 0.04922862543755677, "learning_rate": 1.1394687809653804e-05, "loss": 0.8346, "step": 245780 }, { "epoch": 4.315209185554521, "grad_norm": 0.08075315158298212, "learning_rate": 1.1389950217245595e-05, "loss": 0.8464, "step": 245790 }, { "epoch": 4.315384750434523, "grad_norm": 0.06149364827369889, "learning_rate": 1.1385213760213893e-05, "loss": 0.8393, "step": 245800 }, { "epoch": 4.315560315314524, "grad_norm": 0.07473195562622234, "learning_rate": 1.1380478438618728e-05, "loss": 0.8278, "step": 245810 }, { "epoch": 4.315735880194526, "grad_norm": 0.06559473606312984, "learning_rate": 1.1375744252520055e-05, "loss": 0.8362, "step": 245820 }, { "epoch": 4.315911445074527, "grad_norm": 0.05335887612945319, "learning_rate": 1.1371011201977888e-05, "loss": 0.8401, "step": 245830 }, { "epoch": 4.316087009954528, "grad_norm": 0.0490505162542878, "learning_rate": 1.1366279287052196e-05, "loss": 0.834, "step": 245840 }, { "epoch": 4.31626257483453, "grad_norm": 0.04320766174986809, "learning_rate": 1.136154850780291e-05, "loss": 0.8445, "step": 245850 }, { "epoch": 4.316438139714531, "grad_norm": 0.1456196749008855, "learning_rate": 1.1356818864290004e-05, "loss": 0.8419, "step": 245860 }, { "epoch": 4.316613704594533, "grad_norm": 0.05239227394381047, "learning_rate": 1.135209035657333e-05, "loss": 0.8377, "step": 245870 }, { "epoch": 4.316789269474534, "grad_norm": 0.04647257805340461, "learning_rate": 1.1347362984712907e-05, "loss": 0.8395, "step": 245880 }, { "epoch": 4.316964834354536, "grad_norm": 0.06044983363456469, "learning_rate": 1.1342636748768554e-05, "loss": 0.8381, "step": 245890 }, { "epoch": 4.317140399234537, "grad_norm": 0.05945416853515307, "learning_rate": 1.1337911648800174e-05, "loss": 0.8322, "step": 245900 }, { "epoch": 4.317315964114538, "grad_norm": 0.051621776609643365, "learning_rate": 1.1333187684867649e-05, "loss": 0.8431, "step": 245910 }, { "epoch": 4.31749152899454, "grad_norm": 0.05282641626439428, "learning_rate": 1.1328464857030816e-05, "loss": 0.8378, "step": 245920 }, { "epoch": 4.317667093874541, "grad_norm": 0.0456798777507656, "learning_rate": 1.1323743165349531e-05, "loss": 0.8324, "step": 245930 }, { "epoch": 4.317842658754543, "grad_norm": 0.05254010839732587, "learning_rate": 1.1319022609883564e-05, "loss": 0.8477, "step": 245940 }, { "epoch": 4.318018223634544, "grad_norm": 0.04465380912773114, "learning_rate": 1.131430319069281e-05, "loss": 0.8439, "step": 245950 }, { "epoch": 4.318193788514545, "grad_norm": 0.051579788709676125, "learning_rate": 1.1309584907837001e-05, "loss": 0.8355, "step": 245960 }, { "epoch": 4.318369353394547, "grad_norm": 0.05601585830296319, "learning_rate": 1.130486776137594e-05, "loss": 0.8326, "step": 245970 }, { "epoch": 4.318544918274548, "grad_norm": 0.044449206320163526, "learning_rate": 1.1300151751369426e-05, "loss": 0.8373, "step": 245980 }, { "epoch": 4.31872048315455, "grad_norm": 0.04664832950030553, "learning_rate": 1.1295436877877144e-05, "loss": 0.8456, "step": 245990 }, { "epoch": 4.318896048034551, "grad_norm": 0.057237619979073326, "learning_rate": 1.1290723140958881e-05, "loss": 0.8392, "step": 246000 }, { "epoch": 4.319071612914552, "grad_norm": 0.05466263174698412, "learning_rate": 1.128601054067436e-05, "loss": 0.8429, "step": 246010 }, { "epoch": 4.319247177794554, "grad_norm": 0.05104641756973391, "learning_rate": 1.1281299077083265e-05, "loss": 0.8367, "step": 246020 }, { "epoch": 4.319422742674555, "grad_norm": 0.04545992288778748, "learning_rate": 1.127658875024532e-05, "loss": 0.8422, "step": 246030 }, { "epoch": 4.319598307554557, "grad_norm": 0.052182944405072866, "learning_rate": 1.1271879560220155e-05, "loss": 0.8392, "step": 246040 }, { "epoch": 4.319773872434558, "grad_norm": 0.052450246182211155, "learning_rate": 1.1267171507067509e-05, "loss": 0.8456, "step": 246050 }, { "epoch": 4.31994943731456, "grad_norm": 0.050292689467526595, "learning_rate": 1.126246459084698e-05, "loss": 0.8351, "step": 246060 }, { "epoch": 4.320125002194561, "grad_norm": 0.05640001242792479, "learning_rate": 1.1257758811618225e-05, "loss": 0.8396, "step": 246070 }, { "epoch": 4.320300567074562, "grad_norm": 0.06082219362775022, "learning_rate": 1.1253054169440877e-05, "loss": 0.8373, "step": 246080 }, { "epoch": 4.320476131954564, "grad_norm": 0.08200972198943803, "learning_rate": 1.1248350664374521e-05, "loss": 0.8395, "step": 246090 }, { "epoch": 4.320651696834565, "grad_norm": 0.053436975112367543, "learning_rate": 1.1243648296478775e-05, "loss": 0.8392, "step": 246100 }, { "epoch": 4.320827261714567, "grad_norm": 0.040252176739707476, "learning_rate": 1.1238947065813176e-05, "loss": 0.8336, "step": 246110 }, { "epoch": 4.321002826594568, "grad_norm": 0.05574740923586585, "learning_rate": 1.1234246972437353e-05, "loss": 0.8324, "step": 246120 }, { "epoch": 4.32117839147457, "grad_norm": 0.06164237906156451, "learning_rate": 1.1229548016410812e-05, "loss": 0.8422, "step": 246130 }, { "epoch": 4.321353956354571, "grad_norm": 0.04994522710747393, "learning_rate": 1.1224850197793094e-05, "loss": 0.8339, "step": 246140 }, { "epoch": 4.321529521234572, "grad_norm": 0.0541850197846064, "learning_rate": 1.1220153516643756e-05, "loss": 0.844, "step": 246150 }, { "epoch": 4.321705086114574, "grad_norm": 0.05875738818884981, "learning_rate": 1.1215457973022253e-05, "loss": 0.8354, "step": 246160 }, { "epoch": 4.321880650994575, "grad_norm": 0.04629261844241304, "learning_rate": 1.12107635669881e-05, "loss": 0.8486, "step": 246170 }, { "epoch": 4.322056215874577, "grad_norm": 0.07028599515154274, "learning_rate": 1.1206070298600815e-05, "loss": 0.8371, "step": 246180 }, { "epoch": 4.322231780754578, "grad_norm": 0.05976276228139678, "learning_rate": 1.1201378167919798e-05, "loss": 0.8387, "step": 246190 }, { "epoch": 4.322407345634579, "grad_norm": 0.063456416936515, "learning_rate": 1.119668717500455e-05, "loss": 0.839, "step": 246200 }, { "epoch": 4.322582910514581, "grad_norm": 0.04257637850094369, "learning_rate": 1.1191997319914452e-05, "loss": 0.8407, "step": 246210 }, { "epoch": 4.322758475394582, "grad_norm": 0.0462726173011611, "learning_rate": 1.118730860270899e-05, "loss": 0.8377, "step": 246220 }, { "epoch": 4.322934040274584, "grad_norm": 0.04312337420808283, "learning_rate": 1.1182621023447526e-05, "loss": 0.846, "step": 246230 }, { "epoch": 4.323109605154585, "grad_norm": 0.043391924320128526, "learning_rate": 1.1177934582189473e-05, "loss": 0.8363, "step": 246240 }, { "epoch": 4.323285170034587, "grad_norm": 0.0572035603327061, "learning_rate": 1.1173249278994219e-05, "loss": 0.8389, "step": 246250 }, { "epoch": 4.323460734914588, "grad_norm": 0.04374176197534241, "learning_rate": 1.1168565113921092e-05, "loss": 0.8446, "step": 246260 }, { "epoch": 4.323636299794589, "grad_norm": 0.04344135765151653, "learning_rate": 1.116388208702947e-05, "loss": 0.8346, "step": 246270 }, { "epoch": 4.323811864674591, "grad_norm": 0.04213466003144027, "learning_rate": 1.1159200198378658e-05, "loss": 0.8435, "step": 246280 }, { "epoch": 4.323987429554592, "grad_norm": 0.0526740748439592, "learning_rate": 1.1154519448028023e-05, "loss": 0.8343, "step": 246290 }, { "epoch": 4.324162994434594, "grad_norm": 0.04575707032887137, "learning_rate": 1.1149839836036838e-05, "loss": 0.84, "step": 246300 }, { "epoch": 4.324338559314595, "grad_norm": 0.06610153288354238, "learning_rate": 1.1145161362464388e-05, "loss": 0.832, "step": 246310 }, { "epoch": 4.324514124194597, "grad_norm": 0.07356751373780998, "learning_rate": 1.1140484027369985e-05, "loss": 0.8353, "step": 246320 }, { "epoch": 4.324689689074598, "grad_norm": 0.053096331138369895, "learning_rate": 1.1135807830812861e-05, "loss": 0.8348, "step": 246330 }, { "epoch": 4.324865253954599, "grad_norm": 0.039503922097329155, "learning_rate": 1.1131132772852268e-05, "loss": 0.8451, "step": 246340 }, { "epoch": 4.325040818834601, "grad_norm": 0.04861484543432536, "learning_rate": 1.112645885354747e-05, "loss": 0.8368, "step": 246350 }, { "epoch": 4.325216383714602, "grad_norm": 0.07231507751235747, "learning_rate": 1.1121786072957644e-05, "loss": 0.8419, "step": 246360 }, { "epoch": 4.3253919485946035, "grad_norm": 0.06261723763640438, "learning_rate": 1.111711443114203e-05, "loss": 0.8429, "step": 246370 }, { "epoch": 4.3255675134746046, "grad_norm": 0.055858348460470615, "learning_rate": 1.1112443928159757e-05, "loss": 0.8316, "step": 246380 }, { "epoch": 4.325743078354606, "grad_norm": 0.05723107836725781, "learning_rate": 1.1107774564070095e-05, "loss": 0.8468, "step": 246390 }, { "epoch": 4.3259186432346075, "grad_norm": 0.046026867140160604, "learning_rate": 1.1103106338932135e-05, "loss": 0.842, "step": 246400 }, { "epoch": 4.3260942081146085, "grad_norm": 0.05988624665884387, "learning_rate": 1.1098439252805037e-05, "loss": 0.8336, "step": 246410 }, { "epoch": 4.3262697729946105, "grad_norm": 0.05938465471094208, "learning_rate": 1.1093773305747975e-05, "loss": 0.8436, "step": 246420 }, { "epoch": 4.3264453378746115, "grad_norm": 0.127978270634578, "learning_rate": 1.1089108497820014e-05, "loss": 0.8359, "step": 246430 }, { "epoch": 4.3266209027546125, "grad_norm": 0.04207254928046134, "learning_rate": 1.1084444829080293e-05, "loss": 0.8352, "step": 246440 }, { "epoch": 4.3267964676346145, "grad_norm": 0.06912948848247473, "learning_rate": 1.107978229958786e-05, "loss": 0.8413, "step": 246450 }, { "epoch": 4.3269720325146155, "grad_norm": 0.04369068777483732, "learning_rate": 1.1075120909401845e-05, "loss": 0.8374, "step": 246460 }, { "epoch": 4.327147597394617, "grad_norm": 0.09760934002587227, "learning_rate": 1.1070460658581267e-05, "loss": 0.8462, "step": 246470 }, { "epoch": 4.327323162274618, "grad_norm": 0.05518121994044258, "learning_rate": 1.1065801547185194e-05, "loss": 0.8424, "step": 246480 }, { "epoch": 4.32749872715462, "grad_norm": 0.05719942409781246, "learning_rate": 1.1061143575272656e-05, "loss": 0.8422, "step": 246490 }, { "epoch": 4.327674292034621, "grad_norm": 0.06183008418350007, "learning_rate": 1.105648674290266e-05, "loss": 0.8304, "step": 246500 }, { "epoch": 4.327849856914622, "grad_norm": 0.06627478174728471, "learning_rate": 1.1051831050134213e-05, "loss": 0.8343, "step": 246510 }, { "epoch": 4.328025421794624, "grad_norm": 0.0547292771935589, "learning_rate": 1.104717649702632e-05, "loss": 0.832, "step": 246520 }, { "epoch": 4.328200986674625, "grad_norm": 0.05503781333067508, "learning_rate": 1.1042523083637921e-05, "loss": 0.8435, "step": 246530 }, { "epoch": 4.328376551554627, "grad_norm": 0.0778686073287856, "learning_rate": 1.1037870810028012e-05, "loss": 0.8331, "step": 246540 }, { "epoch": 4.328552116434628, "grad_norm": 0.053326634554785035, "learning_rate": 1.1033219676255487e-05, "loss": 0.834, "step": 246550 }, { "epoch": 4.32872768131463, "grad_norm": 0.057467174570761434, "learning_rate": 1.1028569682379348e-05, "loss": 0.8405, "step": 246560 }, { "epoch": 4.328903246194631, "grad_norm": 0.047283480772804656, "learning_rate": 1.1023920828458451e-05, "loss": 0.8295, "step": 246570 }, { "epoch": 4.329078811074632, "grad_norm": 0.05564013596966547, "learning_rate": 1.1019273114551729e-05, "loss": 0.839, "step": 246580 }, { "epoch": 4.329254375954634, "grad_norm": 0.050810796512307715, "learning_rate": 1.1014626540718075e-05, "loss": 0.8432, "step": 246590 }, { "epoch": 4.329429940834635, "grad_norm": 0.05529550316422678, "learning_rate": 1.1009981107016326e-05, "loss": 0.8436, "step": 246600 }, { "epoch": 4.329605505714637, "grad_norm": 0.04280166827208792, "learning_rate": 1.1005336813505396e-05, "loss": 0.8425, "step": 246610 }, { "epoch": 4.329781070594638, "grad_norm": 0.0478666075915732, "learning_rate": 1.100069366024405e-05, "loss": 0.841, "step": 246620 }, { "epoch": 4.329956635474639, "grad_norm": 0.0540065439158734, "learning_rate": 1.0996051647291191e-05, "loss": 0.8323, "step": 246630 }, { "epoch": 4.330132200354641, "grad_norm": 0.04794469113166784, "learning_rate": 1.0991410774705594e-05, "loss": 0.8437, "step": 246640 }, { "epoch": 4.330307765234642, "grad_norm": 0.05790242885633262, "learning_rate": 1.0986771042546079e-05, "loss": 0.8312, "step": 246650 }, { "epoch": 4.330483330114644, "grad_norm": 0.0481492951252245, "learning_rate": 1.098213245087144e-05, "loss": 0.8376, "step": 246660 }, { "epoch": 4.330658894994645, "grad_norm": 0.06837519826671988, "learning_rate": 1.0977494999740426e-05, "loss": 0.8366, "step": 246670 }, { "epoch": 4.330834459874646, "grad_norm": 0.05911164997572352, "learning_rate": 1.0972858689211793e-05, "loss": 0.835, "step": 246680 }, { "epoch": 4.331010024754648, "grad_norm": 0.03990117410925253, "learning_rate": 1.0968223519344312e-05, "loss": 0.8405, "step": 246690 }, { "epoch": 4.331185589634649, "grad_norm": 0.05356470024666078, "learning_rate": 1.0963589490196677e-05, "loss": 0.8375, "step": 246700 }, { "epoch": 4.331361154514651, "grad_norm": 0.05043129020550859, "learning_rate": 1.0958956601827635e-05, "loss": 0.8379, "step": 246710 }, { "epoch": 4.331536719394652, "grad_norm": 0.06245407765999437, "learning_rate": 1.0954324854295836e-05, "loss": 0.8369, "step": 246720 }, { "epoch": 4.331712284274654, "grad_norm": 0.05786650095282733, "learning_rate": 1.0949694247660028e-05, "loss": 0.8421, "step": 246730 }, { "epoch": 4.331887849154655, "grad_norm": 0.04402620309703318, "learning_rate": 1.094506478197884e-05, "loss": 0.8417, "step": 246740 }, { "epoch": 4.332063414034656, "grad_norm": 0.06647996766992324, "learning_rate": 1.0940436457310938e-05, "loss": 0.847, "step": 246750 }, { "epoch": 4.332238978914658, "grad_norm": 0.048918721126191965, "learning_rate": 1.0935809273714986e-05, "loss": 0.8377, "step": 246760 }, { "epoch": 4.332414543794659, "grad_norm": 0.04258599759705257, "learning_rate": 1.0931183231249576e-05, "loss": 0.8467, "step": 246770 }, { "epoch": 4.332590108674661, "grad_norm": 0.05183150207789099, "learning_rate": 1.0926558329973346e-05, "loss": 0.8375, "step": 246780 }, { "epoch": 4.332765673554662, "grad_norm": 0.06186298077224962, "learning_rate": 1.092193456994485e-05, "loss": 0.8407, "step": 246790 }, { "epoch": 4.332941238434664, "grad_norm": 0.0491263429306468, "learning_rate": 1.0917311951222745e-05, "loss": 0.8334, "step": 246800 }, { "epoch": 4.333116803314665, "grad_norm": 0.06189438258472078, "learning_rate": 1.0912690473865548e-05, "loss": 0.8357, "step": 246810 }, { "epoch": 4.333292368194666, "grad_norm": 0.04657725806537658, "learning_rate": 1.0908070137931822e-05, "loss": 0.8399, "step": 246820 }, { "epoch": 4.333467933074668, "grad_norm": 0.06420233514350097, "learning_rate": 1.0903450943480137e-05, "loss": 0.8424, "step": 246830 }, { "epoch": 4.333643497954669, "grad_norm": 0.05007879164212958, "learning_rate": 1.0898832890568966e-05, "loss": 0.8289, "step": 246840 }, { "epoch": 4.333819062834671, "grad_norm": 0.05611724632921337, "learning_rate": 1.0894215979256861e-05, "loss": 0.8412, "step": 246850 }, { "epoch": 4.333994627714672, "grad_norm": 0.06347556228322218, "learning_rate": 1.0889600209602316e-05, "loss": 0.8465, "step": 246860 }, { "epoch": 4.334170192594673, "grad_norm": 0.050112205303067994, "learning_rate": 1.0884985581663795e-05, "loss": 0.84, "step": 246870 }, { "epoch": 4.334345757474675, "grad_norm": 0.07002613979632667, "learning_rate": 1.0880372095499802e-05, "loss": 0.8399, "step": 246880 }, { "epoch": 4.334521322354676, "grad_norm": 0.04754482543186891, "learning_rate": 1.0875759751168724e-05, "loss": 0.839, "step": 246890 }, { "epoch": 4.334696887234678, "grad_norm": 0.05362356482925689, "learning_rate": 1.0871148548729076e-05, "loss": 0.8318, "step": 246900 }, { "epoch": 4.334872452114679, "grad_norm": 0.04842190130608221, "learning_rate": 1.0866538488239226e-05, "loss": 0.8426, "step": 246910 }, { "epoch": 4.335048016994681, "grad_norm": 0.054089228168250646, "learning_rate": 1.0861929569757618e-05, "loss": 0.8339, "step": 246920 }, { "epoch": 4.335223581874682, "grad_norm": 0.04833851451945741, "learning_rate": 1.0857321793342654e-05, "loss": 0.8413, "step": 246930 }, { "epoch": 4.335399146754683, "grad_norm": 0.051076096585777934, "learning_rate": 1.0852715159052673e-05, "loss": 0.839, "step": 246940 }, { "epoch": 4.335574711634685, "grad_norm": 0.04828870907572849, "learning_rate": 1.0848109666946085e-05, "loss": 0.8381, "step": 246950 }, { "epoch": 4.335750276514686, "grad_norm": 0.04701197133690719, "learning_rate": 1.0843505317081192e-05, "loss": 0.8449, "step": 246960 }, { "epoch": 4.335925841394688, "grad_norm": 0.04686151001621272, "learning_rate": 1.0838902109516411e-05, "loss": 0.84, "step": 246970 }, { "epoch": 4.336101406274689, "grad_norm": 0.04898871907550736, "learning_rate": 1.0834300044309997e-05, "loss": 0.8443, "step": 246980 }, { "epoch": 4.336276971154691, "grad_norm": 0.048046759794959336, "learning_rate": 1.0829699121520273e-05, "loss": 0.8378, "step": 246990 }, { "epoch": 4.336452536034692, "grad_norm": 0.05444105085838679, "learning_rate": 1.082509934120556e-05, "loss": 0.8434, "step": 247000 }, { "epoch": 4.336628100914693, "grad_norm": 0.05684948773481905, "learning_rate": 1.082050070342411e-05, "loss": 0.8389, "step": 247010 }, { "epoch": 4.336803665794695, "grad_norm": 0.05843160146340864, "learning_rate": 1.08159032082342e-05, "loss": 0.8463, "step": 247020 }, { "epoch": 4.336979230674696, "grad_norm": 0.05712792669522365, "learning_rate": 1.0811306855694089e-05, "loss": 0.8401, "step": 247030 }, { "epoch": 4.337154795554698, "grad_norm": 0.07449278470442873, "learning_rate": 1.0806711645861992e-05, "loss": 0.8396, "step": 247040 }, { "epoch": 4.337330360434699, "grad_norm": 0.062063274822815304, "learning_rate": 1.0802117578796162e-05, "loss": 0.8361, "step": 247050 }, { "epoch": 4.3375059253147, "grad_norm": 0.05279793972870181, "learning_rate": 1.0797524654554761e-05, "loss": 0.8386, "step": 247060 }, { "epoch": 4.337681490194702, "grad_norm": 0.04763312099154702, "learning_rate": 1.0792932873196035e-05, "loss": 0.8389, "step": 247070 }, { "epoch": 4.337857055074703, "grad_norm": 0.05519361864753907, "learning_rate": 1.0788342234778125e-05, "loss": 0.851, "step": 247080 }, { "epoch": 4.338032619954705, "grad_norm": 0.09092976029530582, "learning_rate": 1.0783752739359198e-05, "loss": 0.847, "step": 247090 }, { "epoch": 4.338208184834706, "grad_norm": 0.05110667296969216, "learning_rate": 1.077916438699744e-05, "loss": 0.8387, "step": 247100 }, { "epoch": 4.338383749714707, "grad_norm": 0.04407679898662703, "learning_rate": 1.0774577177750942e-05, "loss": 0.8391, "step": 247110 }, { "epoch": 4.338559314594709, "grad_norm": 0.04564855410729714, "learning_rate": 1.0769991111677854e-05, "loss": 0.8424, "step": 247120 }, { "epoch": 4.33873487947471, "grad_norm": 0.0659091102575813, "learning_rate": 1.076540618883625e-05, "loss": 0.8394, "step": 247130 }, { "epoch": 4.3389104443547115, "grad_norm": 0.048936976824179815, "learning_rate": 1.0760822409284266e-05, "loss": 0.8333, "step": 247140 }, { "epoch": 4.3390860092347125, "grad_norm": 0.06167454242062714, "learning_rate": 1.0756239773079941e-05, "loss": 0.8402, "step": 247150 }, { "epoch": 4.3392615741147145, "grad_norm": 0.04950138699663164, "learning_rate": 1.0751658280281357e-05, "loss": 0.8355, "step": 247160 }, { "epoch": 4.3394371389947155, "grad_norm": 0.059812072913810896, "learning_rate": 1.0747077930946565e-05, "loss": 0.8434, "step": 247170 }, { "epoch": 4.3396127038747165, "grad_norm": 0.049532160188277656, "learning_rate": 1.0742498725133585e-05, "loss": 0.8453, "step": 247180 }, { "epoch": 4.3397882687547185, "grad_norm": 0.0469355692947796, "learning_rate": 1.0737920662900442e-05, "loss": 0.8475, "step": 247190 }, { "epoch": 4.3399638336347195, "grad_norm": 0.045484326001113955, "learning_rate": 1.0733343744305163e-05, "loss": 0.84, "step": 247200 }, { "epoch": 4.340139398514721, "grad_norm": 0.053639670514234954, "learning_rate": 1.07287679694057e-05, "loss": 0.8335, "step": 247210 }, { "epoch": 4.3403149633947224, "grad_norm": 0.056180447572302304, "learning_rate": 1.0724193338260064e-05, "loss": 0.8501, "step": 247220 }, { "epoch": 4.340490528274724, "grad_norm": 0.06230455596011744, "learning_rate": 1.0719619850926167e-05, "loss": 0.841, "step": 247230 }, { "epoch": 4.340666093154725, "grad_norm": 0.04273900688371017, "learning_rate": 1.0715047507462018e-05, "loss": 0.8349, "step": 247240 }, { "epoch": 4.340841658034726, "grad_norm": 0.056924041198447586, "learning_rate": 1.0710476307925499e-05, "loss": 0.835, "step": 247250 }, { "epoch": 4.341017222914728, "grad_norm": 0.05107736751474325, "learning_rate": 1.0705906252374554e-05, "loss": 0.8399, "step": 247260 }, { "epoch": 4.341192787794729, "grad_norm": 0.05663667603502806, "learning_rate": 1.070133734086711e-05, "loss": 0.8413, "step": 247270 }, { "epoch": 4.341368352674731, "grad_norm": 0.0527925204318732, "learning_rate": 1.0696769573461e-05, "loss": 0.8406, "step": 247280 }, { "epoch": 4.341543917554732, "grad_norm": 0.05407713319039168, "learning_rate": 1.0692202950214136e-05, "loss": 0.8359, "step": 247290 }, { "epoch": 4.341719482434733, "grad_norm": 0.05628223671673813, "learning_rate": 1.0687637471184344e-05, "loss": 0.8385, "step": 247300 }, { "epoch": 4.341895047314735, "grad_norm": 0.05281536307407832, "learning_rate": 1.0683073136429527e-05, "loss": 0.8332, "step": 247310 }, { "epoch": 4.342070612194736, "grad_norm": 0.04757572461229528, "learning_rate": 1.0678509946007467e-05, "loss": 0.8348, "step": 247320 }, { "epoch": 4.342246177074738, "grad_norm": 0.04893681033145574, "learning_rate": 1.0673947899975984e-05, "loss": 0.84, "step": 247330 }, { "epoch": 4.342421741954739, "grad_norm": 0.05286417861486481, "learning_rate": 1.0669386998392925e-05, "loss": 0.8443, "step": 247340 }, { "epoch": 4.34259730683474, "grad_norm": 0.051704083329076, "learning_rate": 1.066482724131602e-05, "loss": 0.8461, "step": 247350 }, { "epoch": 4.342772871714742, "grad_norm": 0.04654090643555636, "learning_rate": 1.0660268628803058e-05, "loss": 0.8349, "step": 247360 }, { "epoch": 4.342948436594743, "grad_norm": 0.052469854772400924, "learning_rate": 1.0655711160911832e-05, "loss": 0.8385, "step": 247370 }, { "epoch": 4.343124001474745, "grad_norm": 0.06969684141723473, "learning_rate": 1.0651154837700045e-05, "loss": 0.8331, "step": 247380 }, { "epoch": 4.343299566354746, "grad_norm": 0.07220358730270393, "learning_rate": 1.0646599659225456e-05, "loss": 0.8339, "step": 247390 }, { "epoch": 4.343475131234748, "grad_norm": 0.047696123842767, "learning_rate": 1.0642045625545735e-05, "loss": 0.8313, "step": 247400 }, { "epoch": 4.343650696114749, "grad_norm": 0.0538883079162919, "learning_rate": 1.0637492736718643e-05, "loss": 0.8397, "step": 247410 }, { "epoch": 4.34382626099475, "grad_norm": 0.05170058592431217, "learning_rate": 1.0632940992801817e-05, "loss": 0.836, "step": 247420 }, { "epoch": 4.344001825874752, "grad_norm": 0.05022168130150661, "learning_rate": 1.062839039385295e-05, "loss": 0.8358, "step": 247430 }, { "epoch": 4.344177390754753, "grad_norm": 0.04672470034711838, "learning_rate": 1.0623840939929711e-05, "loss": 0.834, "step": 247440 }, { "epoch": 4.344352955634755, "grad_norm": 0.06644501616367558, "learning_rate": 1.0619292631089717e-05, "loss": 0.834, "step": 247450 }, { "epoch": 4.344528520514756, "grad_norm": 0.04492369377434546, "learning_rate": 1.0614745467390615e-05, "loss": 0.8472, "step": 247460 }, { "epoch": 4.344704085394758, "grad_norm": 0.06677609000791049, "learning_rate": 1.0610199448889982e-05, "loss": 0.8422, "step": 247470 }, { "epoch": 4.344879650274759, "grad_norm": 0.05835753118693348, "learning_rate": 1.0605654575645485e-05, "loss": 0.8431, "step": 247480 }, { "epoch": 4.34505521515476, "grad_norm": 0.04580593013546882, "learning_rate": 1.0601110847714643e-05, "loss": 0.8392, "step": 247490 }, { "epoch": 4.345230780034762, "grad_norm": 0.05270133606018113, "learning_rate": 1.0596568265155048e-05, "loss": 0.8335, "step": 247500 }, { "epoch": 4.345406344914763, "grad_norm": 0.05158037393316766, "learning_rate": 1.0592026828024285e-05, "loss": 0.8387, "step": 247510 }, { "epoch": 4.345581909794765, "grad_norm": 0.07770085544286182, "learning_rate": 1.0587486536379846e-05, "loss": 0.844, "step": 247520 }, { "epoch": 4.345757474674766, "grad_norm": 0.054507614878273304, "learning_rate": 1.0582947390279282e-05, "loss": 0.8348, "step": 247530 }, { "epoch": 4.345933039554767, "grad_norm": 0.060450586292299394, "learning_rate": 1.0578409389780113e-05, "loss": 0.8375, "step": 247540 }, { "epoch": 4.346108604434769, "grad_norm": 0.05366989459582905, "learning_rate": 1.0573872534939821e-05, "loss": 0.8404, "step": 247550 }, { "epoch": 4.34628416931477, "grad_norm": 0.057234028573235625, "learning_rate": 1.056933682581591e-05, "loss": 0.8406, "step": 247560 }, { "epoch": 4.346459734194772, "grad_norm": 0.055334799915985104, "learning_rate": 1.056480226246579e-05, "loss": 0.8432, "step": 247570 }, { "epoch": 4.346635299074773, "grad_norm": 0.06986775410998694, "learning_rate": 1.0560268844946997e-05, "loss": 0.8429, "step": 247580 }, { "epoch": 4.346810863954775, "grad_norm": 0.06073338657550239, "learning_rate": 1.055573657331691e-05, "loss": 0.8433, "step": 247590 }, { "epoch": 4.346986428834776, "grad_norm": 0.05578398808854327, "learning_rate": 1.0551205447632976e-05, "loss": 0.8399, "step": 247600 }, { "epoch": 4.347161993714777, "grad_norm": 0.04817940906201229, "learning_rate": 1.0546675467952622e-05, "loss": 0.837, "step": 247610 }, { "epoch": 4.347337558594779, "grad_norm": 0.05841089064428632, "learning_rate": 1.0542146634333206e-05, "loss": 0.8329, "step": 247620 }, { "epoch": 4.34751312347478, "grad_norm": 0.05551715172882545, "learning_rate": 1.0537618946832143e-05, "loss": 0.8306, "step": 247630 }, { "epoch": 4.347688688354782, "grad_norm": 0.05902304870166148, "learning_rate": 1.053309240550675e-05, "loss": 0.839, "step": 247640 }, { "epoch": 4.347864253234783, "grad_norm": 0.06171582198645077, "learning_rate": 1.052856701041445e-05, "loss": 0.8343, "step": 247650 }, { "epoch": 4.348039818114784, "grad_norm": 0.054104103373196016, "learning_rate": 1.0524042761612537e-05, "loss": 0.8474, "step": 247660 }, { "epoch": 4.348215382994786, "grad_norm": 0.06415949368301029, "learning_rate": 1.0519519659158333e-05, "loss": 0.8415, "step": 247670 }, { "epoch": 4.348390947874787, "grad_norm": 0.06397336789263386, "learning_rate": 1.0514997703109175e-05, "loss": 0.8406, "step": 247680 }, { "epoch": 4.348566512754789, "grad_norm": 0.04352273759347458, "learning_rate": 1.0510476893522316e-05, "loss": 0.8348, "step": 247690 }, { "epoch": 4.34874207763479, "grad_norm": 0.23529036128416236, "learning_rate": 1.0505957230455057e-05, "loss": 0.8396, "step": 247700 }, { "epoch": 4.348917642514792, "grad_norm": 0.04672847735605027, "learning_rate": 1.0501438713964696e-05, "loss": 0.84, "step": 247710 }, { "epoch": 4.349093207394793, "grad_norm": 0.053544058196758115, "learning_rate": 1.0496921344108412e-05, "loss": 0.8453, "step": 247720 }, { "epoch": 4.349268772274794, "grad_norm": 0.05876012443051975, "learning_rate": 1.0492405120943513e-05, "loss": 0.8377, "step": 247730 }, { "epoch": 4.349444337154796, "grad_norm": 0.04874382382400339, "learning_rate": 1.0487890044527136e-05, "loss": 0.833, "step": 247740 }, { "epoch": 4.349619902034797, "grad_norm": 0.04688489029225544, "learning_rate": 1.0483376114916587e-05, "loss": 0.8341, "step": 247750 }, { "epoch": 4.349795466914799, "grad_norm": 0.057506881517610936, "learning_rate": 1.0478863332168984e-05, "loss": 0.8252, "step": 247760 }, { "epoch": 4.3499710317948, "grad_norm": 0.046669235543395665, "learning_rate": 1.0474351696341539e-05, "loss": 0.8383, "step": 247770 }, { "epoch": 4.350146596674801, "grad_norm": 0.060049167073318636, "learning_rate": 1.0469841207491413e-05, "loss": 0.8389, "step": 247780 }, { "epoch": 4.350322161554803, "grad_norm": 0.052804423134517005, "learning_rate": 1.0465331865675745e-05, "loss": 0.8443, "step": 247790 }, { "epoch": 4.350497726434804, "grad_norm": 0.05624406797707193, "learning_rate": 1.046082367095168e-05, "loss": 0.8341, "step": 247800 }, { "epoch": 4.350673291314806, "grad_norm": 0.05996664596104661, "learning_rate": 1.0456316623376309e-05, "loss": 0.8291, "step": 247810 }, { "epoch": 4.350848856194807, "grad_norm": 0.11346253769494401, "learning_rate": 1.0451810723006778e-05, "loss": 0.8413, "step": 247820 }, { "epoch": 4.351024421074809, "grad_norm": 0.047423873788799385, "learning_rate": 1.0447305969900151e-05, "loss": 0.8403, "step": 247830 }, { "epoch": 4.35119998595481, "grad_norm": 0.053328702500530234, "learning_rate": 1.0442802364113502e-05, "loss": 0.8389, "step": 247840 }, { "epoch": 4.351375550834811, "grad_norm": 0.04887173842295668, "learning_rate": 1.0438299905703926e-05, "loss": 0.8326, "step": 247850 }, { "epoch": 4.351551115714813, "grad_norm": 0.052764923990648485, "learning_rate": 1.0433798594728434e-05, "loss": 0.8326, "step": 247860 }, { "epoch": 4.351726680594814, "grad_norm": 0.08116676628433284, "learning_rate": 1.0429298431244062e-05, "loss": 0.8396, "step": 247870 }, { "epoch": 4.3519022454748155, "grad_norm": 0.06662988678372961, "learning_rate": 1.0424799415307865e-05, "loss": 0.837, "step": 247880 }, { "epoch": 4.352077810354817, "grad_norm": 0.04847472515560618, "learning_rate": 1.0420301546976793e-05, "loss": 0.8461, "step": 247890 }, { "epoch": 4.3522533752348185, "grad_norm": 0.05229557450522811, "learning_rate": 1.041580482630788e-05, "loss": 0.8443, "step": 247900 }, { "epoch": 4.3524289401148195, "grad_norm": 0.06124256201457836, "learning_rate": 1.0411309253358042e-05, "loss": 0.8368, "step": 247910 }, { "epoch": 4.3526045049948205, "grad_norm": 0.04888830399024857, "learning_rate": 1.0406814828184308e-05, "loss": 0.8389, "step": 247920 }, { "epoch": 4.3527800698748225, "grad_norm": 0.05183078901593183, "learning_rate": 1.0402321550843589e-05, "loss": 0.8336, "step": 247930 }, { "epoch": 4.3529556347548235, "grad_norm": 0.05587531757395444, "learning_rate": 1.0397829421392808e-05, "loss": 0.8374, "step": 247940 }, { "epoch": 4.353131199634825, "grad_norm": 0.05771440425391081, "learning_rate": 1.039333843988891e-05, "loss": 0.8399, "step": 247950 }, { "epoch": 4.3533067645148265, "grad_norm": 0.048295444853711164, "learning_rate": 1.0388848606388765e-05, "loss": 0.8376, "step": 247960 }, { "epoch": 4.3534823293948275, "grad_norm": 0.046846489367724856, "learning_rate": 1.0384359920949282e-05, "loss": 0.8436, "step": 247970 }, { "epoch": 4.353657894274829, "grad_norm": 0.04807611438363218, "learning_rate": 1.0379872383627304e-05, "loss": 0.8401, "step": 247980 }, { "epoch": 4.35383345915483, "grad_norm": 0.04629529238700462, "learning_rate": 1.0375385994479733e-05, "loss": 0.8364, "step": 247990 }, { "epoch": 4.354009024034832, "grad_norm": 0.06460084136640042, "learning_rate": 1.0370900753563381e-05, "loss": 0.8314, "step": 248000 }, { "epoch": 4.354184588914833, "grad_norm": 0.06727189488232657, "learning_rate": 1.036641666093508e-05, "loss": 0.8343, "step": 248010 }, { "epoch": 4.354360153794834, "grad_norm": 0.0700292966412189, "learning_rate": 1.0361933716651673e-05, "loss": 0.8394, "step": 248020 }, { "epoch": 4.354535718674836, "grad_norm": 0.04157539762181821, "learning_rate": 1.0357451920769911e-05, "loss": 0.8304, "step": 248030 }, { "epoch": 4.354711283554837, "grad_norm": 0.04812858393298939, "learning_rate": 1.035297127334661e-05, "loss": 0.8396, "step": 248040 }, { "epoch": 4.354886848434839, "grad_norm": 0.12660719170533888, "learning_rate": 1.0348491774438557e-05, "loss": 0.8383, "step": 248050 }, { "epoch": 4.35506241331484, "grad_norm": 0.04675807454898014, "learning_rate": 1.0344013424102458e-05, "loss": 0.8359, "step": 248060 }, { "epoch": 4.355237978194842, "grad_norm": 0.040414460646019106, "learning_rate": 1.0339536222395105e-05, "loss": 0.8352, "step": 248070 }, { "epoch": 4.355413543074843, "grad_norm": 0.05139404357536068, "learning_rate": 1.0335060169373168e-05, "loss": 0.8379, "step": 248080 }, { "epoch": 4.355589107954844, "grad_norm": 0.04118243034615932, "learning_rate": 1.0330585265093436e-05, "loss": 0.8433, "step": 248090 }, { "epoch": 4.355764672834846, "grad_norm": 0.04634328061899146, "learning_rate": 1.032611150961254e-05, "loss": 0.8337, "step": 248100 }, { "epoch": 4.355940237714847, "grad_norm": 0.05989227479439736, "learning_rate": 1.032163890298719e-05, "loss": 0.8354, "step": 248110 }, { "epoch": 4.356115802594849, "grad_norm": 0.04273307687358923, "learning_rate": 1.0317167445274075e-05, "loss": 0.8463, "step": 248120 }, { "epoch": 4.35629136747485, "grad_norm": 0.06927850967946549, "learning_rate": 1.0312697136529814e-05, "loss": 0.8343, "step": 248130 }, { "epoch": 4.356466932354852, "grad_norm": 0.05196514140429685, "learning_rate": 1.030822797681107e-05, "loss": 0.8389, "step": 248140 }, { "epoch": 4.356642497234853, "grad_norm": 0.07622986650535693, "learning_rate": 1.0303759966174425e-05, "loss": 0.8342, "step": 248150 }, { "epoch": 4.356818062114854, "grad_norm": 0.05678150618857258, "learning_rate": 1.0299293104676553e-05, "loss": 0.8366, "step": 248160 }, { "epoch": 4.356993626994856, "grad_norm": 0.04706023335039374, "learning_rate": 1.029482739237401e-05, "loss": 0.8375, "step": 248170 }, { "epoch": 4.357169191874857, "grad_norm": 0.04902464985652531, "learning_rate": 1.029036282932339e-05, "loss": 0.8413, "step": 248180 }, { "epoch": 4.357344756754859, "grad_norm": 0.04699657898456082, "learning_rate": 1.0285899415581269e-05, "loss": 0.8436, "step": 248190 }, { "epoch": 4.35752032163486, "grad_norm": 0.0511597232892209, "learning_rate": 1.028143715120417e-05, "loss": 0.8408, "step": 248200 }, { "epoch": 4.357695886514861, "grad_norm": 0.06183189334610382, "learning_rate": 1.0276976036248646e-05, "loss": 0.8401, "step": 248210 }, { "epoch": 4.357871451394863, "grad_norm": 0.05335398428608222, "learning_rate": 1.0272516070771245e-05, "loss": 0.8372, "step": 248220 }, { "epoch": 4.358047016274864, "grad_norm": 0.05237487927704034, "learning_rate": 1.0268057254828442e-05, "loss": 0.8373, "step": 248230 }, { "epoch": 4.358222581154866, "grad_norm": 0.05315674556357546, "learning_rate": 1.026359958847675e-05, "loss": 0.8309, "step": 248240 }, { "epoch": 4.358398146034867, "grad_norm": 0.050284301206024645, "learning_rate": 1.0259143071772604e-05, "loss": 0.8424, "step": 248250 }, { "epoch": 4.358573710914869, "grad_norm": 0.05679402617691081, "learning_rate": 1.0254687704772545e-05, "loss": 0.8319, "step": 248260 }, { "epoch": 4.35874927579487, "grad_norm": 0.0511883134477251, "learning_rate": 1.0250233487532965e-05, "loss": 0.8408, "step": 248270 }, { "epoch": 4.358924840674871, "grad_norm": 0.053856124526375844, "learning_rate": 1.0245780420110323e-05, "loss": 0.835, "step": 248280 }, { "epoch": 4.359100405554873, "grad_norm": 0.04395722420969056, "learning_rate": 1.0241328502561051e-05, "loss": 0.8331, "step": 248290 }, { "epoch": 4.359275970434874, "grad_norm": 0.0485601728746751, "learning_rate": 1.023687773494153e-05, "loss": 0.839, "step": 248300 }, { "epoch": 4.359451535314876, "grad_norm": 0.05775614338668411, "learning_rate": 1.0232428117308168e-05, "loss": 0.8339, "step": 248310 }, { "epoch": 4.359627100194877, "grad_norm": 0.046382438993263095, "learning_rate": 1.0227979649717307e-05, "loss": 0.8343, "step": 248320 }, { "epoch": 4.359802665074878, "grad_norm": 0.06150496253714156, "learning_rate": 1.0223532332225374e-05, "loss": 0.8313, "step": 248330 }, { "epoch": 4.35997822995488, "grad_norm": 0.051111961672807335, "learning_rate": 1.0219086164888667e-05, "loss": 0.8376, "step": 248340 }, { "epoch": 4.360153794834881, "grad_norm": 0.049302046013316565, "learning_rate": 1.021464114776354e-05, "loss": 0.8358, "step": 248350 }, { "epoch": 4.360329359714883, "grad_norm": 0.05423480325895099, "learning_rate": 1.021019728090633e-05, "loss": 0.8312, "step": 248360 }, { "epoch": 4.360504924594884, "grad_norm": 0.05903042191828325, "learning_rate": 1.0205754564373293e-05, "loss": 0.8328, "step": 248370 }, { "epoch": 4.360680489474886, "grad_norm": 0.04636917884180159, "learning_rate": 1.0201312998220744e-05, "loss": 0.8341, "step": 248380 }, { "epoch": 4.360856054354887, "grad_norm": 0.04737016722973275, "learning_rate": 1.0196872582504987e-05, "loss": 0.8428, "step": 248390 }, { "epoch": 4.361031619234888, "grad_norm": 0.04217218441151889, "learning_rate": 1.0192433317282234e-05, "loss": 0.8387, "step": 248400 }, { "epoch": 4.36120718411489, "grad_norm": 0.04845909688459904, "learning_rate": 1.0187995202608778e-05, "loss": 0.8398, "step": 248410 }, { "epoch": 4.361382748994891, "grad_norm": 0.05869441688206428, "learning_rate": 1.018355823854079e-05, "loss": 0.833, "step": 248420 }, { "epoch": 4.361558313874893, "grad_norm": 0.052931981364087666, "learning_rate": 1.017912242513455e-05, "loss": 0.8417, "step": 248430 }, { "epoch": 4.361733878754894, "grad_norm": 0.058290062912634236, "learning_rate": 1.0174687762446228e-05, "loss": 0.8394, "step": 248440 }, { "epoch": 4.361909443634895, "grad_norm": 0.04433505543113629, "learning_rate": 1.0170254250532013e-05, "loss": 0.8453, "step": 248450 }, { "epoch": 4.362085008514897, "grad_norm": 0.048805727625056174, "learning_rate": 1.0165821889448108e-05, "loss": 0.8385, "step": 248460 }, { "epoch": 4.362260573394898, "grad_norm": 0.06040740629445998, "learning_rate": 1.0161390679250615e-05, "loss": 0.8442, "step": 248470 }, { "epoch": 4.3624361382749, "grad_norm": 0.05069631021974266, "learning_rate": 1.0156960619995747e-05, "loss": 0.844, "step": 248480 }, { "epoch": 4.362611703154901, "grad_norm": 0.05437775903719531, "learning_rate": 1.0152531711739552e-05, "loss": 0.8495, "step": 248490 }, { "epoch": 4.362787268034903, "grad_norm": 0.054323861051807615, "learning_rate": 1.0148103954538232e-05, "loss": 0.8406, "step": 248500 }, { "epoch": 4.362962832914904, "grad_norm": 0.05374788573138218, "learning_rate": 1.0143677348447828e-05, "loss": 0.8343, "step": 248510 }, { "epoch": 4.363138397794905, "grad_norm": 0.04888992673500921, "learning_rate": 1.0139251893524443e-05, "loss": 0.8411, "step": 248520 }, { "epoch": 4.363313962674907, "grad_norm": 0.05397941825256283, "learning_rate": 1.0134827589824156e-05, "loss": 0.8484, "step": 248530 }, { "epoch": 4.363489527554908, "grad_norm": 0.04349010104439979, "learning_rate": 1.0130404437403013e-05, "loss": 0.8368, "step": 248540 }, { "epoch": 4.36366509243491, "grad_norm": 0.04430518502816031, "learning_rate": 1.012598243631707e-05, "loss": 0.8357, "step": 248550 }, { "epoch": 4.363840657314911, "grad_norm": 0.05410065528765288, "learning_rate": 1.0121561586622344e-05, "loss": 0.8419, "step": 248560 }, { "epoch": 4.364016222194913, "grad_norm": 0.04958426891636123, "learning_rate": 1.011714188837484e-05, "loss": 0.8383, "step": 248570 }, { "epoch": 4.364191787074914, "grad_norm": 0.04845747155118376, "learning_rate": 1.011272334163059e-05, "loss": 0.831, "step": 248580 }, { "epoch": 4.364367351954915, "grad_norm": 0.05652436297000436, "learning_rate": 1.0108305946445504e-05, "loss": 0.8368, "step": 248590 }, { "epoch": 4.364542916834917, "grad_norm": 0.043899562235199266, "learning_rate": 1.010388970287564e-05, "loss": 0.837, "step": 248600 }, { "epoch": 4.364718481714918, "grad_norm": 0.05382468631558322, "learning_rate": 1.00994746109769e-05, "loss": 0.8326, "step": 248610 }, { "epoch": 4.3648940465949195, "grad_norm": 0.04789280842271871, "learning_rate": 1.009506067080524e-05, "loss": 0.8387, "step": 248620 }, { "epoch": 4.365069611474921, "grad_norm": 0.06411270570866912, "learning_rate": 1.0090647882416602e-05, "loss": 0.8343, "step": 248630 }, { "epoch": 4.365245176354922, "grad_norm": 0.06358708809467144, "learning_rate": 1.0086236245866856e-05, "loss": 0.8374, "step": 248640 }, { "epoch": 4.3654207412349235, "grad_norm": 0.054522354527278584, "learning_rate": 1.0081825761211936e-05, "loss": 0.8438, "step": 248650 }, { "epoch": 4.3655963061149246, "grad_norm": 0.049164295645291844, "learning_rate": 1.007741642850768e-05, "loss": 0.8367, "step": 248660 }, { "epoch": 4.3657718709949265, "grad_norm": 0.053033872848738325, "learning_rate": 1.0073008247810018e-05, "loss": 0.8403, "step": 248670 }, { "epoch": 4.3659474358749275, "grad_norm": 0.07118601328830425, "learning_rate": 1.0068601219174745e-05, "loss": 0.8414, "step": 248680 }, { "epoch": 4.3661230007549285, "grad_norm": 0.05198133701618133, "learning_rate": 1.0064195342657715e-05, "loss": 0.842, "step": 248690 }, { "epoch": 4.3662985656349305, "grad_norm": 0.05881656206127965, "learning_rate": 1.0059790618314787e-05, "loss": 0.8382, "step": 248700 }, { "epoch": 4.3664741305149315, "grad_norm": 0.04899867239000686, "learning_rate": 1.0055387046201722e-05, "loss": 0.8411, "step": 248710 }, { "epoch": 4.366649695394933, "grad_norm": 0.04678959433178068, "learning_rate": 1.005098462637433e-05, "loss": 0.8414, "step": 248720 }, { "epoch": 4.3668252602749345, "grad_norm": 0.04902744814546622, "learning_rate": 1.0046583358888403e-05, "loss": 0.8451, "step": 248730 }, { "epoch": 4.367000825154936, "grad_norm": 0.04503327431226676, "learning_rate": 1.0042183243799688e-05, "loss": 0.8445, "step": 248740 }, { "epoch": 4.367176390034937, "grad_norm": 0.05532853523455059, "learning_rate": 1.0037784281163956e-05, "loss": 0.8428, "step": 248750 }, { "epoch": 4.367351954914938, "grad_norm": 0.05400787769014786, "learning_rate": 1.0033386471036899e-05, "loss": 0.8427, "step": 248760 }, { "epoch": 4.36752751979494, "grad_norm": 0.06659401838344604, "learning_rate": 1.0028989813474307e-05, "loss": 0.8386, "step": 248770 }, { "epoch": 4.367703084674941, "grad_norm": 0.04301939148825867, "learning_rate": 1.0024594308531818e-05, "loss": 0.8375, "step": 248780 }, { "epoch": 4.367878649554943, "grad_norm": 0.05529329513610902, "learning_rate": 1.002019995626517e-05, "loss": 0.8394, "step": 248790 }, { "epoch": 4.368054214434944, "grad_norm": 0.046920977119141206, "learning_rate": 1.001580675673003e-05, "loss": 0.8422, "step": 248800 }, { "epoch": 4.368229779314946, "grad_norm": 0.0526843885764775, "learning_rate": 1.001141470998204e-05, "loss": 0.8442, "step": 248810 }, { "epoch": 4.368405344194947, "grad_norm": 0.06517069551379079, "learning_rate": 1.0007023816076889e-05, "loss": 0.828, "step": 248820 }, { "epoch": 4.368580909074948, "grad_norm": 0.04272013999133828, "learning_rate": 1.000263407507014e-05, "loss": 0.8353, "step": 248830 }, { "epoch": 4.36875647395495, "grad_norm": 0.04994292946155752, "learning_rate": 9.998245487017505e-06, "loss": 0.8293, "step": 248840 }, { "epoch": 4.368932038834951, "grad_norm": 0.05037269718392068, "learning_rate": 9.993858051974512e-06, "loss": 0.8424, "step": 248850 }, { "epoch": 4.369107603714953, "grad_norm": 0.05508911553231758, "learning_rate": 9.989471769996778e-06, "loss": 0.8376, "step": 248860 }, { "epoch": 4.369283168594954, "grad_norm": 0.047645231762443466, "learning_rate": 9.985086641139896e-06, "loss": 0.836, "step": 248870 }, { "epoch": 4.369458733474955, "grad_norm": 0.05125741602709451, "learning_rate": 9.980702665459405e-06, "loss": 0.8353, "step": 248880 }, { "epoch": 4.369634298354957, "grad_norm": 0.045805949071411625, "learning_rate": 9.976319843010842e-06, "loss": 0.8343, "step": 248890 }, { "epoch": 4.369809863234958, "grad_norm": 0.047410626761658595, "learning_rate": 9.971938173849771e-06, "loss": 0.8461, "step": 248900 }, { "epoch": 4.36998542811496, "grad_norm": 0.060443327849524595, "learning_rate": 9.96755765803167e-06, "loss": 0.8448, "step": 248910 }, { "epoch": 4.370160992994961, "grad_norm": 0.06761175278998749, "learning_rate": 9.963178295612082e-06, "loss": 0.841, "step": 248920 }, { "epoch": 4.370336557874962, "grad_norm": 0.050922801872098605, "learning_rate": 9.958800086646433e-06, "loss": 0.8381, "step": 248930 }, { "epoch": 4.370512122754964, "grad_norm": 0.057496415112045014, "learning_rate": 9.954423031190278e-06, "loss": 0.8409, "step": 248940 }, { "epoch": 4.370687687634965, "grad_norm": 0.05042822394517842, "learning_rate": 9.950047129299003e-06, "loss": 0.8375, "step": 248950 }, { "epoch": 4.370863252514967, "grad_norm": 0.04094578052307378, "learning_rate": 9.945672381028078e-06, "loss": 0.8334, "step": 248960 }, { "epoch": 4.371038817394968, "grad_norm": 0.05495139728502668, "learning_rate": 9.941298786432961e-06, "loss": 0.837, "step": 248970 }, { "epoch": 4.37121438227497, "grad_norm": 0.05547866299881619, "learning_rate": 9.936926345569028e-06, "loss": 0.8314, "step": 248980 }, { "epoch": 4.371389947154971, "grad_norm": 0.04385621453952655, "learning_rate": 9.932555058491693e-06, "loss": 0.835, "step": 248990 }, { "epoch": 4.371565512034972, "grad_norm": 0.05123200790157125, "learning_rate": 9.928184925256315e-06, "loss": 0.8362, "step": 249000 }, { "epoch": 4.371741076914974, "grad_norm": 0.050349521462646867, "learning_rate": 9.923815945918314e-06, "loss": 0.8261, "step": 249010 }, { "epoch": 4.371916641794975, "grad_norm": 0.07386852341109845, "learning_rate": 9.919448120533012e-06, "loss": 0.8304, "step": 249020 }, { "epoch": 4.372092206674977, "grad_norm": 0.05990999491210293, "learning_rate": 9.91508144915575e-06, "loss": 0.8415, "step": 249030 }, { "epoch": 4.372267771554978, "grad_norm": 0.07068731608349914, "learning_rate": 9.91071593184188e-06, "loss": 0.8414, "step": 249040 }, { "epoch": 4.37244333643498, "grad_norm": 0.04388061970148544, "learning_rate": 9.90635156864668e-06, "loss": 0.8357, "step": 249050 }, { "epoch": 4.372618901314981, "grad_norm": 0.04469423030665372, "learning_rate": 9.901988359625471e-06, "loss": 0.8416, "step": 249060 }, { "epoch": 4.372794466194982, "grad_norm": 0.05612687730248978, "learning_rate": 9.897626304833538e-06, "loss": 0.8405, "step": 249070 }, { "epoch": 4.372970031074984, "grad_norm": 0.05309558548799841, "learning_rate": 9.893265404326124e-06, "loss": 0.8376, "step": 249080 }, { "epoch": 4.373145595954985, "grad_norm": 0.053504712179967155, "learning_rate": 9.888905658158511e-06, "loss": 0.8381, "step": 249090 }, { "epoch": 4.373321160834987, "grad_norm": 0.04677908749013473, "learning_rate": 9.884547066385896e-06, "loss": 0.8411, "step": 249100 }, { "epoch": 4.373496725714988, "grad_norm": 0.04711099364470261, "learning_rate": 9.880189629063562e-06, "loss": 0.8338, "step": 249110 }, { "epoch": 4.373672290594989, "grad_norm": 0.05092884572323222, "learning_rate": 9.875833346246677e-06, "loss": 0.8414, "step": 249120 }, { "epoch": 4.373847855474991, "grad_norm": 0.04188982661386521, "learning_rate": 9.871478217990446e-06, "loss": 0.836, "step": 249130 }, { "epoch": 4.374023420354992, "grad_norm": 0.06602741312002793, "learning_rate": 9.867124244350059e-06, "loss": 0.8491, "step": 249140 }, { "epoch": 4.374198985234994, "grad_norm": 0.045975871553402055, "learning_rate": 9.86277142538067e-06, "loss": 0.8343, "step": 249150 }, { "epoch": 4.374374550114995, "grad_norm": 0.050867968533257334, "learning_rate": 9.85841976113744e-06, "loss": 0.8424, "step": 249160 }, { "epoch": 4.374550114994997, "grad_norm": 0.04929012442184749, "learning_rate": 9.854069251675464e-06, "loss": 0.8328, "step": 249170 }, { "epoch": 4.374725679874998, "grad_norm": 0.04484563046332468, "learning_rate": 9.849719897049941e-06, "loss": 0.8492, "step": 249180 }, { "epoch": 4.374901244754999, "grad_norm": 0.04541211533379785, "learning_rate": 9.845371697315911e-06, "loss": 0.8418, "step": 249190 }, { "epoch": 4.375076809635001, "grad_norm": 0.05675361950669918, "learning_rate": 9.841024652528494e-06, "loss": 0.8408, "step": 249200 }, { "epoch": 4.375252374515002, "grad_norm": 0.05159887359531993, "learning_rate": 9.836678762742791e-06, "loss": 0.8408, "step": 249210 }, { "epoch": 4.375427939395004, "grad_norm": 0.0580475837877503, "learning_rate": 9.832334028013806e-06, "loss": 0.8351, "step": 249220 }, { "epoch": 4.375603504275005, "grad_norm": 0.04693996391331549, "learning_rate": 9.827990448396632e-06, "loss": 0.8396, "step": 249230 }, { "epoch": 4.375779069155007, "grad_norm": 0.04810508873558979, "learning_rate": 9.823648023946302e-06, "loss": 0.8366, "step": 249240 }, { "epoch": 4.375954634035008, "grad_norm": 0.06942495450433706, "learning_rate": 9.819306754717817e-06, "loss": 0.8455, "step": 249250 }, { "epoch": 4.376130198915009, "grad_norm": 0.06797876989431985, "learning_rate": 9.814966640766205e-06, "loss": 0.8299, "step": 249260 }, { "epoch": 4.376305763795011, "grad_norm": 0.060183940164738096, "learning_rate": 9.810627682146406e-06, "loss": 0.8434, "step": 249270 }, { "epoch": 4.376481328675012, "grad_norm": 0.04382848299436312, "learning_rate": 9.806289878913458e-06, "loss": 0.8412, "step": 249280 }, { "epoch": 4.376656893555014, "grad_norm": 0.04803939193556529, "learning_rate": 9.801953231122291e-06, "loss": 0.8389, "step": 249290 }, { "epoch": 4.376832458435015, "grad_norm": 0.05107956372197884, "learning_rate": 9.797617738827843e-06, "loss": 0.8418, "step": 249300 }, { "epoch": 4.377008023315016, "grad_norm": 0.056996064877260666, "learning_rate": 9.793283402085073e-06, "loss": 0.846, "step": 249310 }, { "epoch": 4.377183588195018, "grad_norm": 0.05106064550309327, "learning_rate": 9.788950220948875e-06, "loss": 0.839, "step": 249320 }, { "epoch": 4.377359153075019, "grad_norm": 0.05365400363357463, "learning_rate": 9.784618195474166e-06, "loss": 0.8419, "step": 249330 }, { "epoch": 4.377534717955021, "grad_norm": 0.05413463702819866, "learning_rate": 9.780287325715806e-06, "loss": 0.8316, "step": 249340 }, { "epoch": 4.377710282835022, "grad_norm": 0.04880922330598473, "learning_rate": 9.775957611728709e-06, "loss": 0.8359, "step": 249350 }, { "epoch": 4.377885847715023, "grad_norm": 0.04819266191724967, "learning_rate": 9.771629053567694e-06, "loss": 0.8307, "step": 249360 }, { "epoch": 4.378061412595025, "grad_norm": 0.048593549884073106, "learning_rate": 9.767301651287633e-06, "loss": 0.8374, "step": 249370 }, { "epoch": 4.378236977475026, "grad_norm": 0.04149940613072813, "learning_rate": 9.762975404943348e-06, "loss": 0.8425, "step": 249380 }, { "epoch": 4.3784125423550275, "grad_norm": 0.0451300986216177, "learning_rate": 9.758650314589643e-06, "loss": 0.838, "step": 249390 }, { "epoch": 4.378588107235029, "grad_norm": 0.06260368552253591, "learning_rate": 9.75432638028132e-06, "loss": 0.8446, "step": 249400 }, { "epoch": 4.3787636721150305, "grad_norm": 0.04558345513743233, "learning_rate": 9.75000360207319e-06, "loss": 0.8378, "step": 249410 }, { "epoch": 4.3789392369950315, "grad_norm": 0.0638939995086517, "learning_rate": 9.745681980019983e-06, "loss": 0.8321, "step": 249420 }, { "epoch": 4.3791148018750325, "grad_norm": 0.04574029824322572, "learning_rate": 9.741361514176495e-06, "loss": 0.8454, "step": 249430 }, { "epoch": 4.3792903667550345, "grad_norm": 0.04656280552222453, "learning_rate": 9.73704220459741e-06, "loss": 0.838, "step": 249440 }, { "epoch": 4.3794659316350355, "grad_norm": 0.04606637910787882, "learning_rate": 9.732724051337522e-06, "loss": 0.8466, "step": 249450 }, { "epoch": 4.379641496515037, "grad_norm": 0.046618406022645525, "learning_rate": 9.728407054451497e-06, "loss": 0.8379, "step": 249460 }, { "epoch": 4.3798170613950385, "grad_norm": 0.05279606552166028, "learning_rate": 9.72409121399404e-06, "loss": 0.8294, "step": 249470 }, { "epoch": 4.37999262627504, "grad_norm": 0.04281514694205284, "learning_rate": 9.719776530019864e-06, "loss": 0.8427, "step": 249480 }, { "epoch": 4.380168191155041, "grad_norm": 0.0463030553260527, "learning_rate": 9.7154630025836e-06, "loss": 0.8373, "step": 249490 }, { "epoch": 4.3803437560350424, "grad_norm": 0.05282623071838824, "learning_rate": 9.711150631739921e-06, "loss": 0.8435, "step": 249500 }, { "epoch": 4.380519320915044, "grad_norm": 0.04986738699618986, "learning_rate": 9.70683941754344e-06, "loss": 0.8373, "step": 249510 }, { "epoch": 4.380694885795045, "grad_norm": 0.053025171455297775, "learning_rate": 9.70252936004882e-06, "loss": 0.8427, "step": 249520 }, { "epoch": 4.380870450675047, "grad_norm": 0.0476116917242871, "learning_rate": 9.698220459310639e-06, "loss": 0.8423, "step": 249530 }, { "epoch": 4.381046015555048, "grad_norm": 0.06064420330627581, "learning_rate": 9.693912715383497e-06, "loss": 0.8329, "step": 249540 }, { "epoch": 4.381221580435049, "grad_norm": 0.05544931057352584, "learning_rate": 9.68960612832201e-06, "loss": 0.8339, "step": 249550 }, { "epoch": 4.381397145315051, "grad_norm": 0.04767928425932525, "learning_rate": 9.685300698180683e-06, "loss": 0.8259, "step": 249560 }, { "epoch": 4.381572710195052, "grad_norm": 0.06326129435367762, "learning_rate": 9.680996425014097e-06, "loss": 0.8384, "step": 249570 }, { "epoch": 4.381748275075054, "grad_norm": 0.05109216377911851, "learning_rate": 9.676693308876806e-06, "loss": 0.8302, "step": 249580 }, { "epoch": 4.381923839955055, "grad_norm": 0.046966653559451677, "learning_rate": 9.6723913498233e-06, "loss": 0.8293, "step": 249590 }, { "epoch": 4.382099404835056, "grad_norm": 0.050840743978369934, "learning_rate": 9.668090547908113e-06, "loss": 0.8381, "step": 249600 }, { "epoch": 4.382274969715058, "grad_norm": 0.07286403870882209, "learning_rate": 9.663790903185685e-06, "loss": 0.8335, "step": 249610 }, { "epoch": 4.382450534595059, "grad_norm": 0.0566738180969571, "learning_rate": 9.659492415710575e-06, "loss": 0.8358, "step": 249620 }, { "epoch": 4.382626099475061, "grad_norm": 0.04873184728229524, "learning_rate": 9.655195085537172e-06, "loss": 0.8385, "step": 249630 }, { "epoch": 4.382801664355062, "grad_norm": 0.043052134553500696, "learning_rate": 9.65089891271996e-06, "loss": 0.8362, "step": 249640 }, { "epoch": 4.382977229235064, "grad_norm": 0.04649703934174705, "learning_rate": 9.646603897313385e-06, "loss": 0.8407, "step": 249650 }, { "epoch": 4.383152794115065, "grad_norm": 0.057107164217081036, "learning_rate": 9.642310039371824e-06, "loss": 0.8418, "step": 249660 }, { "epoch": 4.383328358995066, "grad_norm": 0.05558345502075509, "learning_rate": 9.63801733894972e-06, "loss": 0.8369, "step": 249670 }, { "epoch": 4.383503923875068, "grad_norm": 0.050291869535012995, "learning_rate": 9.633725796101423e-06, "loss": 0.8396, "step": 249680 }, { "epoch": 4.383679488755069, "grad_norm": 0.05659088899670341, "learning_rate": 9.629435410881351e-06, "loss": 0.8342, "step": 249690 }, { "epoch": 4.383855053635071, "grad_norm": 0.05133232817966617, "learning_rate": 9.625146183343837e-06, "loss": 0.8363, "step": 249700 }, { "epoch": 4.384030618515072, "grad_norm": 0.06246089536617769, "learning_rate": 9.620858113543226e-06, "loss": 0.838, "step": 249710 }, { "epoch": 4.384206183395074, "grad_norm": 0.05221195431082349, "learning_rate": 9.616571201533874e-06, "loss": 0.8352, "step": 249720 }, { "epoch": 4.384381748275075, "grad_norm": 0.06369491495788297, "learning_rate": 9.612285447370057e-06, "loss": 0.8385, "step": 249730 }, { "epoch": 4.384557313155076, "grad_norm": 0.05931581284978873, "learning_rate": 9.608000851106097e-06, "loss": 0.8391, "step": 249740 }, { "epoch": 4.384732878035078, "grad_norm": 0.04336253270558309, "learning_rate": 9.603717412796308e-06, "loss": 0.8364, "step": 249750 }, { "epoch": 4.384908442915079, "grad_norm": 0.05440838234697337, "learning_rate": 9.59943513249491e-06, "loss": 0.8357, "step": 249760 }, { "epoch": 4.385084007795081, "grad_norm": 0.04820377421643382, "learning_rate": 9.59515401025621e-06, "loss": 0.8388, "step": 249770 }, { "epoch": 4.385259572675082, "grad_norm": 0.06918884086134826, "learning_rate": 9.59087404613439e-06, "loss": 0.8397, "step": 249780 }, { "epoch": 4.385435137555083, "grad_norm": 0.047684505913767765, "learning_rate": 9.586595240183741e-06, "loss": 0.8373, "step": 249790 }, { "epoch": 4.385610702435085, "grad_norm": 0.05632367136069801, "learning_rate": 9.582317592458448e-06, "loss": 0.8372, "step": 249800 }, { "epoch": 4.385786267315086, "grad_norm": 0.06549391167029689, "learning_rate": 9.578041103012694e-06, "loss": 0.8414, "step": 249810 }, { "epoch": 4.385961832195088, "grad_norm": 0.05572149662692996, "learning_rate": 9.573765771900705e-06, "loss": 0.8339, "step": 249820 }, { "epoch": 4.386137397075089, "grad_norm": 0.05104797188395551, "learning_rate": 9.569491599176608e-06, "loss": 0.8362, "step": 249830 }, { "epoch": 4.386312961955091, "grad_norm": 0.05234409930026774, "learning_rate": 9.565218584894592e-06, "loss": 0.8409, "step": 249840 }, { "epoch": 4.386488526835092, "grad_norm": 0.04791358172149713, "learning_rate": 9.560946729108744e-06, "loss": 0.838, "step": 249850 }, { "epoch": 4.386664091715093, "grad_norm": 0.07210082493946582, "learning_rate": 9.556676031873262e-06, "loss": 0.8429, "step": 249860 }, { "epoch": 4.386839656595095, "grad_norm": 0.05184235710627151, "learning_rate": 9.552406493242212e-06, "loss": 0.8447, "step": 249870 }, { "epoch": 4.387015221475096, "grad_norm": 0.051841697607396764, "learning_rate": 9.54813811326968e-06, "loss": 0.8431, "step": 249880 }, { "epoch": 4.387190786355098, "grad_norm": 0.048820849151274055, "learning_rate": 9.543870892009787e-06, "loss": 0.8397, "step": 249890 }, { "epoch": 4.387366351235099, "grad_norm": 0.06741945580192654, "learning_rate": 9.539604829516569e-06, "loss": 0.8404, "step": 249900 }, { "epoch": 4.387541916115101, "grad_norm": 0.05035439713417252, "learning_rate": 9.535339925844078e-06, "loss": 0.8351, "step": 249910 }, { "epoch": 4.387717480995102, "grad_norm": 0.0749932983915975, "learning_rate": 9.531076181046378e-06, "loss": 0.8403, "step": 249920 }, { "epoch": 4.387893045875103, "grad_norm": 0.05448498126717617, "learning_rate": 9.52681359517746e-06, "loss": 0.8346, "step": 249930 }, { "epoch": 4.388068610755105, "grad_norm": 0.05890090855179807, "learning_rate": 9.52255216829136e-06, "loss": 0.8378, "step": 249940 }, { "epoch": 4.388244175635106, "grad_norm": 0.054794255058811824, "learning_rate": 9.518291900442017e-06, "loss": 0.8434, "step": 249950 }, { "epoch": 4.388419740515108, "grad_norm": 0.061555507450643705, "learning_rate": 9.514032791683478e-06, "loss": 0.8424, "step": 249960 }, { "epoch": 4.388595305395109, "grad_norm": 0.04701596096427494, "learning_rate": 9.509774842069674e-06, "loss": 0.8375, "step": 249970 }, { "epoch": 4.38877087027511, "grad_norm": 0.06811457420020534, "learning_rate": 9.505518051654544e-06, "loss": 0.8348, "step": 249980 }, { "epoch": 4.388946435155112, "grad_norm": 0.05331561342773622, "learning_rate": 9.50126242049206e-06, "loss": 0.841, "step": 249990 }, { "epoch": 4.389122000035113, "grad_norm": 0.05417008611807753, "learning_rate": 9.497007948636098e-06, "loss": 0.8327, "step": 250000 }, { "epoch": 4.389297564915115, "grad_norm": 0.042284562515361, "learning_rate": 9.49275463614059e-06, "loss": 0.8436, "step": 250010 }, { "epoch": 4.389473129795116, "grad_norm": 0.0814357559531662, "learning_rate": 9.488502483059398e-06, "loss": 0.8288, "step": 250020 }, { "epoch": 4.389648694675117, "grad_norm": 0.05009718484998311, "learning_rate": 9.484251489446441e-06, "loss": 0.8353, "step": 250030 }, { "epoch": 4.389824259555119, "grad_norm": 0.05803697273643302, "learning_rate": 9.480001655355541e-06, "loss": 0.8357, "step": 250040 }, { "epoch": 4.38999982443512, "grad_norm": 0.06157386359891623, "learning_rate": 9.475752980840563e-06, "loss": 0.8344, "step": 250050 }, { "epoch": 4.390175389315122, "grad_norm": 0.04522178356332587, "learning_rate": 9.471505465955351e-06, "loss": 0.8406, "step": 250060 }, { "epoch": 4.390350954195123, "grad_norm": 0.06825843813453555, "learning_rate": 9.467259110753691e-06, "loss": 0.8302, "step": 250070 }, { "epoch": 4.390526519075125, "grad_norm": 0.046538173268722685, "learning_rate": 9.463013915289401e-06, "loss": 0.8406, "step": 250080 }, { "epoch": 4.390702083955126, "grad_norm": 0.051211949185671395, "learning_rate": 9.458769879616275e-06, "loss": 0.842, "step": 250090 }, { "epoch": 4.390877648835127, "grad_norm": 0.06167265185366056, "learning_rate": 9.45452700378807e-06, "loss": 0.8318, "step": 250100 }, { "epoch": 4.391053213715129, "grad_norm": 0.09607699682312983, "learning_rate": 9.450285287858576e-06, "loss": 0.8307, "step": 250110 }, { "epoch": 4.39122877859513, "grad_norm": 0.05541184409244226, "learning_rate": 9.446044731881465e-06, "loss": 0.8443, "step": 250120 }, { "epoch": 4.3914043434751315, "grad_norm": 0.0483929692292693, "learning_rate": 9.441805335910562e-06, "loss": 0.8359, "step": 250130 }, { "epoch": 4.391579908355133, "grad_norm": 0.04770473435787869, "learning_rate": 9.437567099999497e-06, "loss": 0.8353, "step": 250140 }, { "epoch": 4.3917554732351345, "grad_norm": 0.06482300047564389, "learning_rate": 9.433330024202017e-06, "loss": 0.8427, "step": 250150 }, { "epoch": 4.3919310381151355, "grad_norm": 0.0786678969411667, "learning_rate": 9.429094108571815e-06, "loss": 0.834, "step": 250160 }, { "epoch": 4.3921066029951366, "grad_norm": 0.05023753599923858, "learning_rate": 9.424859353162513e-06, "loss": 0.8391, "step": 250170 }, { "epoch": 4.3922821678751385, "grad_norm": 0.046284866609783475, "learning_rate": 9.42062575802781e-06, "loss": 0.8421, "step": 250180 }, { "epoch": 4.3924577327551395, "grad_norm": 0.05663136283114535, "learning_rate": 9.4163933232213e-06, "loss": 0.8349, "step": 250190 }, { "epoch": 4.392633297635141, "grad_norm": 0.05812924745765406, "learning_rate": 9.412162048796678e-06, "loss": 0.8354, "step": 250200 }, { "epoch": 4.3928088625151425, "grad_norm": 0.05997327533841313, "learning_rate": 9.407931934807487e-06, "loss": 0.8413, "step": 250210 }, { "epoch": 4.3929844273951435, "grad_norm": 0.047690855827685855, "learning_rate": 9.403702981307361e-06, "loss": 0.8432, "step": 250220 }, { "epoch": 4.393159992275145, "grad_norm": 0.048969100456241976, "learning_rate": 9.399475188349885e-06, "loss": 0.8467, "step": 250230 }, { "epoch": 4.3933355571551465, "grad_norm": 0.05398518551574716, "learning_rate": 9.395248555988604e-06, "loss": 0.8416, "step": 250240 }, { "epoch": 4.393511122035148, "grad_norm": 0.0534478227695198, "learning_rate": 9.391023084277076e-06, "loss": 0.8338, "step": 250250 }, { "epoch": 4.393686686915149, "grad_norm": 0.04650662607507607, "learning_rate": 9.386798773268857e-06, "loss": 0.8371, "step": 250260 }, { "epoch": 4.39386225179515, "grad_norm": 0.05409485482161247, "learning_rate": 9.382575623017436e-06, "loss": 0.8382, "step": 250270 }, { "epoch": 4.394037816675152, "grad_norm": 0.04247891006657534, "learning_rate": 9.378353633576358e-06, "loss": 0.8405, "step": 250280 }, { "epoch": 4.394213381555153, "grad_norm": 0.047471596658553224, "learning_rate": 9.374132804999067e-06, "loss": 0.8418, "step": 250290 }, { "epoch": 4.394388946435155, "grad_norm": 0.05309671804936861, "learning_rate": 9.369913137339108e-06, "loss": 0.8371, "step": 250300 }, { "epoch": 4.394564511315156, "grad_norm": 0.05831194080912037, "learning_rate": 9.365694630649888e-06, "loss": 0.8401, "step": 250310 }, { "epoch": 4.394740076195158, "grad_norm": 0.043197974061099195, "learning_rate": 9.36147728498489e-06, "loss": 0.8365, "step": 250320 }, { "epoch": 4.394915641075159, "grad_norm": 0.054614841436262426, "learning_rate": 9.357261100397546e-06, "loss": 0.8416, "step": 250330 }, { "epoch": 4.39509120595516, "grad_norm": 0.05874929162024638, "learning_rate": 9.353046076941247e-06, "loss": 0.8431, "step": 250340 }, { "epoch": 4.395266770835162, "grad_norm": 0.06289145956018707, "learning_rate": 9.348832214669442e-06, "loss": 0.8387, "step": 250350 }, { "epoch": 4.395442335715163, "grad_norm": 0.0792231000777063, "learning_rate": 9.344619513635467e-06, "loss": 0.8408, "step": 250360 }, { "epoch": 4.395617900595165, "grad_norm": 0.09119388050056848, "learning_rate": 9.340407973892754e-06, "loss": 0.8282, "step": 250370 }, { "epoch": 4.395793465475166, "grad_norm": 0.05664302502587843, "learning_rate": 9.336197595494625e-06, "loss": 0.8445, "step": 250380 }, { "epoch": 4.395969030355168, "grad_norm": 0.05840583862911107, "learning_rate": 9.331988378494433e-06, "loss": 0.8397, "step": 250390 }, { "epoch": 4.396144595235169, "grad_norm": 0.04790376198998712, "learning_rate": 9.327780322945547e-06, "loss": 0.8362, "step": 250400 }, { "epoch": 4.39632016011517, "grad_norm": 0.048447659804067206, "learning_rate": 9.323573428901222e-06, "loss": 0.8356, "step": 250410 }, { "epoch": 4.396495724995172, "grad_norm": 0.059187451335722494, "learning_rate": 9.319367696414796e-06, "loss": 0.8421, "step": 250420 }, { "epoch": 4.396671289875173, "grad_norm": 0.04054083854162119, "learning_rate": 9.315163125539569e-06, "loss": 0.8389, "step": 250430 }, { "epoch": 4.396846854755175, "grad_norm": 0.045853746624040084, "learning_rate": 9.310959716328783e-06, "loss": 0.8382, "step": 250440 }, { "epoch": 4.397022419635176, "grad_norm": 0.045449790375425986, "learning_rate": 9.30675746883573e-06, "loss": 0.8474, "step": 250450 }, { "epoch": 4.397197984515177, "grad_norm": 0.051347549599226876, "learning_rate": 9.302556383113586e-06, "loss": 0.8404, "step": 250460 }, { "epoch": 4.397373549395179, "grad_norm": 0.05587685825611571, "learning_rate": 9.298356459215674e-06, "loss": 0.8363, "step": 250470 }, { "epoch": 4.39754911427518, "grad_norm": 0.0547363942821251, "learning_rate": 9.29415769719514e-06, "loss": 0.8347, "step": 250480 }, { "epoch": 4.397724679155182, "grad_norm": 0.05061586967606174, "learning_rate": 9.289960097105199e-06, "loss": 0.8309, "step": 250490 }, { "epoch": 4.397900244035183, "grad_norm": 0.05061706467583482, "learning_rate": 9.285763658999059e-06, "loss": 0.8323, "step": 250500 }, { "epoch": 4.398075808915185, "grad_norm": 0.046453744345627726, "learning_rate": 9.281568382929848e-06, "loss": 0.8412, "step": 250510 }, { "epoch": 4.398251373795186, "grad_norm": 0.06982495410436416, "learning_rate": 9.277374268950765e-06, "loss": 0.8408, "step": 250520 }, { "epoch": 4.398426938675187, "grad_norm": 0.053875969663511464, "learning_rate": 9.273181317114898e-06, "loss": 0.8422, "step": 250530 }, { "epoch": 4.398602503555189, "grad_norm": 0.0452138741024222, "learning_rate": 9.268989527475433e-06, "loss": 0.8435, "step": 250540 }, { "epoch": 4.39877806843519, "grad_norm": 0.06655546253937995, "learning_rate": 9.264798900085436e-06, "loss": 0.8383, "step": 250550 }, { "epoch": 4.398953633315192, "grad_norm": 0.04115028634110336, "learning_rate": 9.260609434998016e-06, "loss": 0.842, "step": 250560 }, { "epoch": 4.399129198195193, "grad_norm": 0.05164531486345813, "learning_rate": 9.25642113226627e-06, "loss": 0.8418, "step": 250570 }, { "epoch": 4.399304763075194, "grad_norm": 0.08134644177554483, "learning_rate": 9.252233991943235e-06, "loss": 0.8362, "step": 250580 }, { "epoch": 4.399480327955196, "grad_norm": 0.043615399643811524, "learning_rate": 9.248048014081984e-06, "loss": 0.8429, "step": 250590 }, { "epoch": 4.399655892835197, "grad_norm": 0.04253542569649958, "learning_rate": 9.243863198735558e-06, "loss": 0.8456, "step": 250600 }, { "epoch": 4.399831457715199, "grad_norm": 0.05081785709755138, "learning_rate": 9.239679545956965e-06, "loss": 0.8466, "step": 250610 }, { "epoch": 4.4000070225952, "grad_norm": 0.047447318350175266, "learning_rate": 9.235497055799219e-06, "loss": 0.841, "step": 250620 }, { "epoch": 4.400182587475202, "grad_norm": 0.04255660607970881, "learning_rate": 9.23131572831529e-06, "loss": 0.8325, "step": 250630 }, { "epoch": 4.400358152355203, "grad_norm": 0.0693748193590077, "learning_rate": 9.227135563558209e-06, "loss": 0.8333, "step": 250640 }, { "epoch": 4.400533717235204, "grad_norm": 0.05028377627719966, "learning_rate": 9.22295656158089e-06, "loss": 0.8406, "step": 250650 }, { "epoch": 4.400709282115206, "grad_norm": 0.11458139517851433, "learning_rate": 9.218778722436309e-06, "loss": 0.8327, "step": 250660 }, { "epoch": 4.400884846995207, "grad_norm": 0.07552394374709752, "learning_rate": 9.214602046177397e-06, "loss": 0.8338, "step": 250670 }, { "epoch": 4.401060411875209, "grad_norm": 0.05522796081784795, "learning_rate": 9.210426532857062e-06, "loss": 0.8406, "step": 250680 }, { "epoch": 4.40123597675521, "grad_norm": 0.040635968429315605, "learning_rate": 9.206252182528218e-06, "loss": 0.8304, "step": 250690 }, { "epoch": 4.401411541635211, "grad_norm": 0.06256419799613347, "learning_rate": 9.202078995243726e-06, "loss": 0.835, "step": 250700 }, { "epoch": 4.401587106515213, "grad_norm": 0.05401589264608262, "learning_rate": 9.197906971056511e-06, "loss": 0.8379, "step": 250710 }, { "epoch": 4.401762671395214, "grad_norm": 0.052940574630773354, "learning_rate": 9.193736110019405e-06, "loss": 0.843, "step": 250720 }, { "epoch": 4.401938236275216, "grad_norm": 0.049280352618761236, "learning_rate": 9.18956641218524e-06, "loss": 0.8355, "step": 250730 }, { "epoch": 4.402113801155217, "grad_norm": 0.04737286516987363, "learning_rate": 9.1853978776069e-06, "loss": 0.8361, "step": 250740 }, { "epoch": 4.402289366035219, "grad_norm": 0.05228560508200319, "learning_rate": 9.181230506337129e-06, "loss": 0.8474, "step": 250750 }, { "epoch": 4.40246493091522, "grad_norm": 0.0570075127177841, "learning_rate": 9.177064298428786e-06, "loss": 0.8426, "step": 250760 }, { "epoch": 4.402640495795221, "grad_norm": 0.0533948452880883, "learning_rate": 9.172899253934635e-06, "loss": 0.8376, "step": 250770 }, { "epoch": 4.402816060675223, "grad_norm": 0.06281344388471158, "learning_rate": 9.168735372907439e-06, "loss": 0.831, "step": 250780 }, { "epoch": 4.402991625555224, "grad_norm": 0.047499250839111974, "learning_rate": 9.164572655399987e-06, "loss": 0.8305, "step": 250790 }, { "epoch": 4.403167190435226, "grad_norm": 0.05628907634073075, "learning_rate": 9.160411101464955e-06, "loss": 0.8354, "step": 250800 }, { "epoch": 4.403342755315227, "grad_norm": 0.05270616298185229, "learning_rate": 9.156250711155153e-06, "loss": 0.8352, "step": 250810 }, { "epoch": 4.403518320195229, "grad_norm": 0.05155737909221415, "learning_rate": 9.152091484523248e-06, "loss": 0.8466, "step": 250820 }, { "epoch": 4.40369388507523, "grad_norm": 0.04803206858922459, "learning_rate": 9.147933421621938e-06, "loss": 0.8458, "step": 250830 }, { "epoch": 4.403869449955231, "grad_norm": 0.045403594853264354, "learning_rate": 9.143776522503934e-06, "loss": 0.841, "step": 250840 }, { "epoch": 4.404045014835233, "grad_norm": 0.05463179467931895, "learning_rate": 9.139620787221865e-06, "loss": 0.8486, "step": 250850 }, { "epoch": 4.404220579715234, "grad_norm": 0.05190133594890208, "learning_rate": 9.135466215828428e-06, "loss": 0.8407, "step": 250860 }, { "epoch": 4.4043961445952355, "grad_norm": 0.06213622440896758, "learning_rate": 9.131312808376205e-06, "loss": 0.8341, "step": 250870 }, { "epoch": 4.404571709475237, "grad_norm": 0.051869308386611065, "learning_rate": 9.127160564917884e-06, "loss": 0.8404, "step": 250880 }, { "epoch": 4.404747274355238, "grad_norm": 0.054987315083329776, "learning_rate": 9.123009485506026e-06, "loss": 0.8307, "step": 250890 }, { "epoch": 4.4049228392352395, "grad_norm": 0.06728792733659232, "learning_rate": 9.11885957019326e-06, "loss": 0.8461, "step": 250900 }, { "epoch": 4.405098404115241, "grad_norm": 0.05266077721785263, "learning_rate": 9.114710819032157e-06, "loss": 0.8456, "step": 250910 }, { "epoch": 4.4052739689952425, "grad_norm": 0.05246813984553654, "learning_rate": 9.110563232075273e-06, "loss": 0.8384, "step": 250920 }, { "epoch": 4.4054495338752435, "grad_norm": 0.0620600457761443, "learning_rate": 9.106416809375148e-06, "loss": 0.841, "step": 250930 }, { "epoch": 4.4056250987552446, "grad_norm": 0.049865388690591456, "learning_rate": 9.102271550984365e-06, "loss": 0.8312, "step": 250940 }, { "epoch": 4.4058006636352465, "grad_norm": 0.06352329589441828, "learning_rate": 9.098127456955382e-06, "loss": 0.8402, "step": 250950 }, { "epoch": 4.4059762285152475, "grad_norm": 0.048500845964938574, "learning_rate": 9.093984527340762e-06, "loss": 0.8379, "step": 250960 }, { "epoch": 4.406151793395249, "grad_norm": 0.05523597085967654, "learning_rate": 9.089842762192944e-06, "loss": 0.8401, "step": 250970 }, { "epoch": 4.4063273582752505, "grad_norm": 0.060733640701600325, "learning_rate": 9.085702161564452e-06, "loss": 0.8289, "step": 250980 }, { "epoch": 4.406502923155252, "grad_norm": 0.06507300974761854, "learning_rate": 9.081562725507728e-06, "loss": 0.8414, "step": 250990 }, { "epoch": 4.406678488035253, "grad_norm": 0.05573289072215501, "learning_rate": 9.07742445407521e-06, "loss": 0.8353, "step": 251000 }, { "epoch": 4.4068540529152544, "grad_norm": 0.06860637408053208, "learning_rate": 9.07328734731936e-06, "loss": 0.8414, "step": 251010 }, { "epoch": 4.407029617795256, "grad_norm": 0.06414352137800788, "learning_rate": 9.069151405292559e-06, "loss": 0.8347, "step": 251020 }, { "epoch": 4.407205182675257, "grad_norm": 0.060075595866169264, "learning_rate": 9.065016628047249e-06, "loss": 0.8362, "step": 251030 }, { "epoch": 4.407380747555259, "grad_norm": 0.0556313919266062, "learning_rate": 9.06088301563576e-06, "loss": 0.8319, "step": 251040 }, { "epoch": 4.40755631243526, "grad_norm": 0.05303463859324361, "learning_rate": 9.056750568110538e-06, "loss": 0.8356, "step": 251050 }, { "epoch": 4.407731877315262, "grad_norm": 0.04792812865201051, "learning_rate": 9.052619285523894e-06, "loss": 0.8381, "step": 251060 }, { "epoch": 4.407907442195263, "grad_norm": 0.05271272341608069, "learning_rate": 9.048489167928178e-06, "loss": 0.8457, "step": 251070 }, { "epoch": 4.408083007075264, "grad_norm": 0.0600123883195301, "learning_rate": 9.044360215375752e-06, "loss": 0.8384, "step": 251080 }, { "epoch": 4.408258571955266, "grad_norm": 0.05332563534039346, "learning_rate": 9.04023242791888e-06, "loss": 0.8374, "step": 251090 }, { "epoch": 4.408434136835267, "grad_norm": 0.04441127987842643, "learning_rate": 9.036105805609887e-06, "loss": 0.8346, "step": 251100 }, { "epoch": 4.408609701715269, "grad_norm": 0.05377857330478821, "learning_rate": 9.031980348501084e-06, "loss": 0.8377, "step": 251110 }, { "epoch": 4.40878526659527, "grad_norm": 0.04730254703811243, "learning_rate": 9.027856056644693e-06, "loss": 0.8438, "step": 251120 }, { "epoch": 4.408960831475271, "grad_norm": 0.045141515929253986, "learning_rate": 9.023732930093016e-06, "loss": 0.8412, "step": 251130 }, { "epoch": 4.409136396355273, "grad_norm": 0.054100859724552725, "learning_rate": 9.019610968898228e-06, "loss": 0.8356, "step": 251140 }, { "epoch": 4.409311961235274, "grad_norm": 0.04987230710490107, "learning_rate": 9.01549017311263e-06, "loss": 0.8345, "step": 251150 }, { "epoch": 4.409487526115276, "grad_norm": 0.0448410485306441, "learning_rate": 9.01137054278838e-06, "loss": 0.8345, "step": 251160 }, { "epoch": 4.409663090995277, "grad_norm": 0.05833462427810838, "learning_rate": 9.007252077977704e-06, "loss": 0.8455, "step": 251170 }, { "epoch": 4.409838655875279, "grad_norm": 0.05321595583403733, "learning_rate": 9.003134778732794e-06, "loss": 0.8485, "step": 251180 }, { "epoch": 4.41001422075528, "grad_norm": 0.04978655192413645, "learning_rate": 8.999018645105772e-06, "loss": 0.8494, "step": 251190 }, { "epoch": 4.410189785635281, "grad_norm": 0.052656998099387405, "learning_rate": 8.994903677148835e-06, "loss": 0.8431, "step": 251200 }, { "epoch": 4.410365350515283, "grad_norm": 0.050370540041383915, "learning_rate": 8.990789874914064e-06, "loss": 0.835, "step": 251210 }, { "epoch": 4.410540915395284, "grad_norm": 0.051769176676633145, "learning_rate": 8.986677238453658e-06, "loss": 0.8407, "step": 251220 }, { "epoch": 4.410716480275286, "grad_norm": 0.04931536097796973, "learning_rate": 8.98256576781967e-06, "loss": 0.8433, "step": 251230 }, { "epoch": 4.410892045155287, "grad_norm": 0.054548427385838406, "learning_rate": 8.9784554630642e-06, "loss": 0.8364, "step": 251240 }, { "epoch": 4.411067610035288, "grad_norm": 0.03875270932224459, "learning_rate": 8.974346324239366e-06, "loss": 0.8384, "step": 251250 }, { "epoch": 4.41124317491529, "grad_norm": 0.051216818684988986, "learning_rate": 8.970238351397173e-06, "loss": 0.829, "step": 251260 }, { "epoch": 4.411418739795291, "grad_norm": 0.06593571959538541, "learning_rate": 8.966131544589698e-06, "loss": 0.8292, "step": 251270 }, { "epoch": 4.411594304675293, "grad_norm": 0.04498355306963626, "learning_rate": 8.962025903868995e-06, "loss": 0.846, "step": 251280 }, { "epoch": 4.411769869555294, "grad_norm": 0.04902382023259568, "learning_rate": 8.957921429287045e-06, "loss": 0.8403, "step": 251290 }, { "epoch": 4.411945434435296, "grad_norm": 0.05746897745920257, "learning_rate": 8.953818120895887e-06, "loss": 0.8292, "step": 251300 }, { "epoch": 4.412120999315297, "grad_norm": 0.04934598784254338, "learning_rate": 8.949715978747458e-06, "loss": 0.8339, "step": 251310 }, { "epoch": 4.412296564195298, "grad_norm": 0.0517498659136083, "learning_rate": 8.945615002893812e-06, "loss": 0.835, "step": 251320 }, { "epoch": 4.4124721290753, "grad_norm": 0.05961630280019871, "learning_rate": 8.941515193386834e-06, "loss": 0.8443, "step": 251330 }, { "epoch": 4.412647693955301, "grad_norm": 0.04227121323368595, "learning_rate": 8.937416550278519e-06, "loss": 0.8385, "step": 251340 }, { "epoch": 4.412823258835303, "grad_norm": 0.05287794267871087, "learning_rate": 8.933319073620783e-06, "loss": 0.8359, "step": 251350 }, { "epoch": 4.412998823715304, "grad_norm": 0.048533852312024964, "learning_rate": 8.92922276346552e-06, "loss": 0.839, "step": 251360 }, { "epoch": 4.413174388595305, "grad_norm": 0.0552187263178662, "learning_rate": 8.925127619864673e-06, "loss": 0.8271, "step": 251370 }, { "epoch": 4.413349953475307, "grad_norm": 0.044268045270449696, "learning_rate": 8.921033642870075e-06, "loss": 0.8446, "step": 251380 }, { "epoch": 4.413525518355308, "grad_norm": 0.07310894368391559, "learning_rate": 8.916940832533651e-06, "loss": 0.8375, "step": 251390 }, { "epoch": 4.41370108323531, "grad_norm": 0.04560157202071827, "learning_rate": 8.912849188907227e-06, "loss": 0.8375, "step": 251400 }, { "epoch": 4.413876648115311, "grad_norm": 0.04414807882275178, "learning_rate": 8.908758712042653e-06, "loss": 0.842, "step": 251410 }, { "epoch": 4.414052212995313, "grad_norm": 0.04696289133411016, "learning_rate": 8.904669401991767e-06, "loss": 0.8346, "step": 251420 }, { "epoch": 4.414227777875314, "grad_norm": 0.05235938758337804, "learning_rate": 8.900581258806359e-06, "loss": 0.8347, "step": 251430 }, { "epoch": 4.414403342755315, "grad_norm": 0.05606102366884861, "learning_rate": 8.896494282538242e-06, "loss": 0.8369, "step": 251440 }, { "epoch": 4.414578907635317, "grad_norm": 0.04597399400516327, "learning_rate": 8.892408473239183e-06, "loss": 0.8395, "step": 251450 }, { "epoch": 4.414754472515318, "grad_norm": 0.06160858525140655, "learning_rate": 8.888323830960965e-06, "loss": 0.8398, "step": 251460 }, { "epoch": 4.41493003739532, "grad_norm": 0.04411418834233668, "learning_rate": 8.884240355755356e-06, "loss": 0.8332, "step": 251470 }, { "epoch": 4.415105602275321, "grad_norm": 0.05888073664369715, "learning_rate": 8.880158047674032e-06, "loss": 0.8376, "step": 251480 }, { "epoch": 4.415281167155323, "grad_norm": 0.04864807574005934, "learning_rate": 8.8760769067688e-06, "loss": 0.8357, "step": 251490 }, { "epoch": 4.415456732035324, "grad_norm": 0.05317471315552484, "learning_rate": 8.871996933091315e-06, "loss": 0.8384, "step": 251500 }, { "epoch": 4.415632296915325, "grad_norm": 0.051397538733654255, "learning_rate": 8.86791812669328e-06, "loss": 0.837, "step": 251510 }, { "epoch": 4.415807861795327, "grad_norm": 0.054892923631296234, "learning_rate": 8.863840487626401e-06, "loss": 0.8351, "step": 251520 }, { "epoch": 4.415983426675328, "grad_norm": 0.08783052872432914, "learning_rate": 8.859764015942301e-06, "loss": 0.8376, "step": 251530 }, { "epoch": 4.41615899155533, "grad_norm": 0.044715855441787515, "learning_rate": 8.855688711692662e-06, "loss": 0.8351, "step": 251540 }, { "epoch": 4.416334556435331, "grad_norm": 0.046102603167139315, "learning_rate": 8.851614574929073e-06, "loss": 0.843, "step": 251550 }, { "epoch": 4.416510121315332, "grad_norm": 0.04563031417203102, "learning_rate": 8.84754160570323e-06, "loss": 0.8452, "step": 251560 }, { "epoch": 4.416685686195334, "grad_norm": 0.06962421442073607, "learning_rate": 8.84346980406667e-06, "loss": 0.8426, "step": 251570 }, { "epoch": 4.416861251075335, "grad_norm": 0.05444024408337803, "learning_rate": 8.839399170071017e-06, "loss": 0.8405, "step": 251580 }, { "epoch": 4.417036815955337, "grad_norm": 0.05944259066278874, "learning_rate": 8.835329703767852e-06, "loss": 0.8359, "step": 251590 }, { "epoch": 4.417212380835338, "grad_norm": 0.06292754162256531, "learning_rate": 8.831261405208706e-06, "loss": 0.8379, "step": 251600 }, { "epoch": 4.417387945715339, "grad_norm": 0.055236424624510706, "learning_rate": 8.827194274445157e-06, "loss": 0.8269, "step": 251610 }, { "epoch": 4.417563510595341, "grad_norm": 0.04324221345194529, "learning_rate": 8.823128311528703e-06, "loss": 0.8397, "step": 251620 }, { "epoch": 4.417739075475342, "grad_norm": 0.05139592973938702, "learning_rate": 8.819063516510886e-06, "loss": 0.8393, "step": 251630 }, { "epoch": 4.4179146403553435, "grad_norm": 0.051553792232305565, "learning_rate": 8.814999889443218e-06, "loss": 0.8414, "step": 251640 }, { "epoch": 4.418090205235345, "grad_norm": 0.050881031404733254, "learning_rate": 8.81093743037713e-06, "loss": 0.8381, "step": 251650 }, { "epoch": 4.4182657701153465, "grad_norm": 0.05680941677627339, "learning_rate": 8.80687613936417e-06, "loss": 0.8408, "step": 251660 }, { "epoch": 4.4184413349953475, "grad_norm": 0.05530049609087751, "learning_rate": 8.802816016455735e-06, "loss": 0.8349, "step": 251670 }, { "epoch": 4.418616899875349, "grad_norm": 0.05517278942549323, "learning_rate": 8.7987570617033e-06, "loss": 0.8358, "step": 251680 }, { "epoch": 4.4187924647553505, "grad_norm": 0.05797983664105514, "learning_rate": 8.794699275158292e-06, "loss": 0.8372, "step": 251690 }, { "epoch": 4.4189680296353515, "grad_norm": 0.04401031852165657, "learning_rate": 8.790642656872094e-06, "loss": 0.834, "step": 251700 }, { "epoch": 4.419143594515353, "grad_norm": 0.055224980443944374, "learning_rate": 8.786587206896152e-06, "loss": 0.8413, "step": 251710 }, { "epoch": 4.4193191593953545, "grad_norm": 0.044958952063427246, "learning_rate": 8.782532925281787e-06, "loss": 0.8404, "step": 251720 }, { "epoch": 4.419494724275356, "grad_norm": 0.045163662470338525, "learning_rate": 8.778479812080438e-06, "loss": 0.8332, "step": 251730 }, { "epoch": 4.419670289155357, "grad_norm": 0.05190870029321653, "learning_rate": 8.774427867343404e-06, "loss": 0.8407, "step": 251740 }, { "epoch": 4.4198458540353585, "grad_norm": 0.04942702603536956, "learning_rate": 8.770377091122036e-06, "loss": 0.8448, "step": 251750 }, { "epoch": 4.42002141891536, "grad_norm": 0.04709180780120835, "learning_rate": 8.766327483467695e-06, "loss": 0.8331, "step": 251760 }, { "epoch": 4.420196983795361, "grad_norm": 0.0677516297263376, "learning_rate": 8.762279044431645e-06, "loss": 0.8328, "step": 251770 }, { "epoch": 4.420372548675363, "grad_norm": 0.04981151470405238, "learning_rate": 8.758231774065204e-06, "loss": 0.8346, "step": 251780 }, { "epoch": 4.420548113555364, "grad_norm": 0.05826909785983269, "learning_rate": 8.754185672419632e-06, "loss": 0.8398, "step": 251790 }, { "epoch": 4.420723678435365, "grad_norm": 0.06393251374575598, "learning_rate": 8.750140739546217e-06, "loss": 0.836, "step": 251800 }, { "epoch": 4.420899243315367, "grad_norm": 0.058944257329536495, "learning_rate": 8.746096975496198e-06, "loss": 0.8328, "step": 251810 }, { "epoch": 4.421074808195368, "grad_norm": 0.04601100366134911, "learning_rate": 8.742054380320785e-06, "loss": 0.8409, "step": 251820 }, { "epoch": 4.42125037307537, "grad_norm": 0.04192501850107229, "learning_rate": 8.738012954071262e-06, "loss": 0.8385, "step": 251830 }, { "epoch": 4.421425937955371, "grad_norm": 0.05624022573971229, "learning_rate": 8.733972696798772e-06, "loss": 0.8447, "step": 251840 }, { "epoch": 4.421601502835372, "grad_norm": 0.0475725121826854, "learning_rate": 8.729933608554544e-06, "loss": 0.8332, "step": 251850 }, { "epoch": 4.421777067715374, "grad_norm": 0.05008280076692091, "learning_rate": 8.72589568938974e-06, "loss": 0.8405, "step": 251860 }, { "epoch": 4.421952632595375, "grad_norm": 0.08583636041742021, "learning_rate": 8.721858939355517e-06, "loss": 0.8413, "step": 251870 }, { "epoch": 4.422128197475377, "grad_norm": 0.04865840772970111, "learning_rate": 8.717823358503038e-06, "loss": 0.8334, "step": 251880 }, { "epoch": 4.422303762355378, "grad_norm": 0.06952862057464201, "learning_rate": 8.713788946883388e-06, "loss": 0.8386, "step": 251890 }, { "epoch": 4.42247932723538, "grad_norm": 0.05331566804049456, "learning_rate": 8.709755704547756e-06, "loss": 0.8463, "step": 251900 }, { "epoch": 4.422654892115381, "grad_norm": 0.052312412136635685, "learning_rate": 8.705723631547186e-06, "loss": 0.8352, "step": 251910 }, { "epoch": 4.422830456995382, "grad_norm": 0.05521291363013707, "learning_rate": 8.701692727932791e-06, "loss": 0.833, "step": 251920 }, { "epoch": 4.423006021875384, "grad_norm": 0.04497438881243429, "learning_rate": 8.697662993755656e-06, "loss": 0.8387, "step": 251930 }, { "epoch": 4.423181586755385, "grad_norm": 0.04518610913393563, "learning_rate": 8.693634429066797e-06, "loss": 0.8316, "step": 251940 }, { "epoch": 4.423357151635387, "grad_norm": 0.050322263249580064, "learning_rate": 8.689607033917301e-06, "loss": 0.8401, "step": 251950 }, { "epoch": 4.423532716515388, "grad_norm": 0.06030631477987046, "learning_rate": 8.685580808358145e-06, "loss": 0.8312, "step": 251960 }, { "epoch": 4.42370828139539, "grad_norm": 0.059231921653843334, "learning_rate": 8.68155575244039e-06, "loss": 0.8434, "step": 251970 }, { "epoch": 4.423883846275391, "grad_norm": 0.054596649662896235, "learning_rate": 8.677531866215027e-06, "loss": 0.8327, "step": 251980 }, { "epoch": 4.424059411155392, "grad_norm": 0.06512656208369046, "learning_rate": 8.673509149733003e-06, "loss": 0.8367, "step": 251990 }, { "epoch": 4.424234976035394, "grad_norm": 0.053701944443584046, "learning_rate": 8.669487603045329e-06, "loss": 0.8389, "step": 252000 }, { "epoch": 4.424410540915395, "grad_norm": 0.0511458459626452, "learning_rate": 8.665467226202937e-06, "loss": 0.8393, "step": 252010 }, { "epoch": 4.424586105795397, "grad_norm": 0.059184262206251136, "learning_rate": 8.661448019256772e-06, "loss": 0.8383, "step": 252020 }, { "epoch": 4.424761670675398, "grad_norm": 0.06487204329370354, "learning_rate": 8.657429982257767e-06, "loss": 0.836, "step": 252030 }, { "epoch": 4.424937235555399, "grad_norm": 0.0597499763188485, "learning_rate": 8.653413115256811e-06, "loss": 0.839, "step": 252040 }, { "epoch": 4.425112800435401, "grad_norm": 0.06848915176703438, "learning_rate": 8.649397418304816e-06, "loss": 0.8399, "step": 252050 }, { "epoch": 4.425288365315402, "grad_norm": 0.046773243112396876, "learning_rate": 8.64538289145263e-06, "loss": 0.8411, "step": 252060 }, { "epoch": 4.425463930195404, "grad_norm": 0.06774700383312032, "learning_rate": 8.641369534751182e-06, "loss": 0.8299, "step": 252070 }, { "epoch": 4.425639495075405, "grad_norm": 0.04456170265039534, "learning_rate": 8.637357348251249e-06, "loss": 0.8413, "step": 252080 }, { "epoch": 4.425815059955407, "grad_norm": 0.043731182127319454, "learning_rate": 8.633346332003717e-06, "loss": 0.8344, "step": 252090 }, { "epoch": 4.425990624835408, "grad_norm": 0.04881236622933115, "learning_rate": 8.629336486059392e-06, "loss": 0.8385, "step": 252100 }, { "epoch": 4.426166189715409, "grad_norm": 0.12687663890704864, "learning_rate": 8.62532781046907e-06, "loss": 0.8359, "step": 252110 }, { "epoch": 4.426341754595411, "grad_norm": 0.0539627998023148, "learning_rate": 8.621320305283556e-06, "loss": 0.8499, "step": 252120 }, { "epoch": 4.426517319475412, "grad_norm": 0.09576170241984366, "learning_rate": 8.617313970553604e-06, "loss": 0.834, "step": 252130 }, { "epoch": 4.426692884355414, "grad_norm": 0.062163497085160196, "learning_rate": 8.613308806329998e-06, "loss": 0.8383, "step": 252140 }, { "epoch": 4.426868449235415, "grad_norm": 0.05396401705053233, "learning_rate": 8.609304812663477e-06, "loss": 0.8371, "step": 252150 }, { "epoch": 4.427044014115417, "grad_norm": 0.06247378073206681, "learning_rate": 8.605301989604742e-06, "loss": 0.8378, "step": 252160 }, { "epoch": 4.427219578995418, "grad_norm": 0.047742519129201924, "learning_rate": 8.601300337204574e-06, "loss": 0.8415, "step": 252170 }, { "epoch": 4.427395143875419, "grad_norm": 0.06004412002888419, "learning_rate": 8.597299855513621e-06, "loss": 0.8397, "step": 252180 }, { "epoch": 4.427570708755421, "grad_norm": 0.05255523218735327, "learning_rate": 8.59330054458259e-06, "loss": 0.8406, "step": 252190 }, { "epoch": 4.427746273635422, "grad_norm": 0.05233183734842703, "learning_rate": 8.589302404462174e-06, "loss": 0.8413, "step": 252200 }, { "epoch": 4.427921838515424, "grad_norm": 0.05975808951973075, "learning_rate": 8.585305435202976e-06, "loss": 0.8421, "step": 252210 }, { "epoch": 4.428097403395425, "grad_norm": 0.06477441588830757, "learning_rate": 8.581309636855692e-06, "loss": 0.8297, "step": 252220 }, { "epoch": 4.428272968275426, "grad_norm": 0.04873000160133851, "learning_rate": 8.577315009470902e-06, "loss": 0.8377, "step": 252230 }, { "epoch": 4.428448533155428, "grad_norm": 0.052631652055762895, "learning_rate": 8.573321553099264e-06, "loss": 0.8377, "step": 252240 }, { "epoch": 4.428624098035429, "grad_norm": 0.04705929305807769, "learning_rate": 8.569329267791342e-06, "loss": 0.8431, "step": 252250 }, { "epoch": 4.428799662915431, "grad_norm": 0.049140232847169785, "learning_rate": 8.565338153597736e-06, "loss": 0.8301, "step": 252260 }, { "epoch": 4.428975227795432, "grad_norm": 0.04476562374844736, "learning_rate": 8.561348210569026e-06, "loss": 0.8348, "step": 252270 }, { "epoch": 4.429150792675433, "grad_norm": 0.05126280704887249, "learning_rate": 8.557359438755733e-06, "loss": 0.8332, "step": 252280 }, { "epoch": 4.429326357555435, "grad_norm": 0.058864152385961724, "learning_rate": 8.553371838208422e-06, "loss": 0.8399, "step": 252290 }, { "epoch": 4.429501922435436, "grad_norm": 0.05082660345233886, "learning_rate": 8.5493854089776e-06, "loss": 0.8386, "step": 252300 }, { "epoch": 4.429677487315438, "grad_norm": 0.0569126718822101, "learning_rate": 8.545400151113783e-06, "loss": 0.8343, "step": 252310 }, { "epoch": 4.429853052195439, "grad_norm": 0.0484925268089862, "learning_rate": 8.541416064667474e-06, "loss": 0.8375, "step": 252320 }, { "epoch": 4.430028617075441, "grad_norm": 0.05972126122178655, "learning_rate": 8.537433149689125e-06, "loss": 0.84, "step": 252330 }, { "epoch": 4.430204181955442, "grad_norm": 0.05213718705264309, "learning_rate": 8.533451406229237e-06, "loss": 0.83, "step": 252340 }, { "epoch": 4.430379746835443, "grad_norm": 0.0777019268910168, "learning_rate": 8.529470834338229e-06, "loss": 0.8426, "step": 252350 }, { "epoch": 4.430555311715445, "grad_norm": 0.05115141541965036, "learning_rate": 8.525491434066544e-06, "loss": 0.8367, "step": 252360 }, { "epoch": 4.430730876595446, "grad_norm": 0.04164235234598252, "learning_rate": 8.521513205464623e-06, "loss": 0.8447, "step": 252370 }, { "epoch": 4.4309064414754475, "grad_norm": 0.052694905724934724, "learning_rate": 8.517536148582845e-06, "loss": 0.8326, "step": 252380 }, { "epoch": 4.431082006355449, "grad_norm": 0.06258813541203853, "learning_rate": 8.513560263471612e-06, "loss": 0.8326, "step": 252390 }, { "epoch": 4.4312575712354505, "grad_norm": 0.05960451788082379, "learning_rate": 8.509585550181265e-06, "loss": 0.8407, "step": 252400 }, { "epoch": 4.4314331361154515, "grad_norm": 0.052791810737246474, "learning_rate": 8.505612008762234e-06, "loss": 0.8322, "step": 252410 }, { "epoch": 4.431608700995453, "grad_norm": 0.0430270024490982, "learning_rate": 8.50163963926481e-06, "loss": 0.8389, "step": 252420 }, { "epoch": 4.4317842658754545, "grad_norm": 0.047253606594707974, "learning_rate": 8.497668441739326e-06, "loss": 0.8346, "step": 252430 }, { "epoch": 4.4319598307554555, "grad_norm": 0.048608924693918254, "learning_rate": 8.493698416236143e-06, "loss": 0.8409, "step": 252440 }, { "epoch": 4.432135395635457, "grad_norm": 0.05598225466948425, "learning_rate": 8.489729562805501e-06, "loss": 0.8313, "step": 252450 }, { "epoch": 4.4323109605154585, "grad_norm": 0.050493214374802205, "learning_rate": 8.48576188149774e-06, "loss": 0.8406, "step": 252460 }, { "epoch": 4.4324865253954595, "grad_norm": 0.04887000404814448, "learning_rate": 8.481795372363089e-06, "loss": 0.8357, "step": 252470 }, { "epoch": 4.432662090275461, "grad_norm": 0.04494159069759292, "learning_rate": 8.47783003545182e-06, "loss": 0.8438, "step": 252480 }, { "epoch": 4.4328376551554625, "grad_norm": 0.050018982083394246, "learning_rate": 8.473865870814191e-06, "loss": 0.8393, "step": 252490 }, { "epoch": 4.433013220035464, "grad_norm": 0.04943171663339999, "learning_rate": 8.469902878500383e-06, "loss": 0.841, "step": 252500 }, { "epoch": 4.433188784915465, "grad_norm": 0.0397991778076104, "learning_rate": 8.46594105856067e-06, "loss": 0.8415, "step": 252510 }, { "epoch": 4.4333643497954665, "grad_norm": 0.05760699132088902, "learning_rate": 8.461980411045203e-06, "loss": 0.8421, "step": 252520 }, { "epoch": 4.433539914675468, "grad_norm": 0.04173019095152896, "learning_rate": 8.458020936004174e-06, "loss": 0.8461, "step": 252530 }, { "epoch": 4.433715479555469, "grad_norm": 0.036691106472172476, "learning_rate": 8.454062633487778e-06, "loss": 0.8359, "step": 252540 }, { "epoch": 4.433891044435471, "grad_norm": 0.05659934985376422, "learning_rate": 8.45010550354613e-06, "loss": 0.8393, "step": 252550 }, { "epoch": 4.434066609315472, "grad_norm": 0.04906409790239427, "learning_rate": 8.446149546229387e-06, "loss": 0.8389, "step": 252560 }, { "epoch": 4.434242174195474, "grad_norm": 0.05264551938433608, "learning_rate": 8.442194761587645e-06, "loss": 0.8372, "step": 252570 }, { "epoch": 4.434417739075475, "grad_norm": 0.05048190062831621, "learning_rate": 8.438241149671073e-06, "loss": 0.843, "step": 252580 }, { "epoch": 4.434593303955476, "grad_norm": 0.05325235102453735, "learning_rate": 8.434288710529695e-06, "loss": 0.8375, "step": 252590 }, { "epoch": 4.434768868835478, "grad_norm": 0.06651956533772455, "learning_rate": 8.430337444213623e-06, "loss": 0.8366, "step": 252600 }, { "epoch": 4.434944433715479, "grad_norm": 0.05015455683368762, "learning_rate": 8.426387350772937e-06, "loss": 0.8422, "step": 252610 }, { "epoch": 4.435119998595481, "grad_norm": 0.040168010768565084, "learning_rate": 8.422438430257643e-06, "loss": 0.8376, "step": 252620 }, { "epoch": 4.435295563475482, "grad_norm": 0.05265180023654487, "learning_rate": 8.418490682717817e-06, "loss": 0.8424, "step": 252630 }, { "epoch": 4.435471128355484, "grad_norm": 0.044897074663661864, "learning_rate": 8.414544108203437e-06, "loss": 0.8396, "step": 252640 }, { "epoch": 4.435646693235485, "grad_norm": 0.0587263931524389, "learning_rate": 8.410598706764525e-06, "loss": 0.8397, "step": 252650 }, { "epoch": 4.435822258115486, "grad_norm": 0.04579971368935492, "learning_rate": 8.406654478451102e-06, "loss": 0.84, "step": 252660 }, { "epoch": 4.435997822995488, "grad_norm": 0.04835866559293575, "learning_rate": 8.402711423313062e-06, "loss": 0.8286, "step": 252670 }, { "epoch": 4.436173387875489, "grad_norm": 0.04829381120191266, "learning_rate": 8.398769541400456e-06, "loss": 0.8396, "step": 252680 }, { "epoch": 4.436348952755491, "grad_norm": 0.062017361389420884, "learning_rate": 8.394828832763169e-06, "loss": 0.8373, "step": 252690 }, { "epoch": 4.436524517635492, "grad_norm": 0.0530129165291228, "learning_rate": 8.390889297451157e-06, "loss": 0.8407, "step": 252700 }, { "epoch": 4.436700082515493, "grad_norm": 0.043426773851002874, "learning_rate": 8.386950935514332e-06, "loss": 0.8452, "step": 252710 }, { "epoch": 4.436875647395495, "grad_norm": 0.05361314660364481, "learning_rate": 8.383013747002569e-06, "loss": 0.8485, "step": 252720 }, { "epoch": 4.437051212275496, "grad_norm": 0.049950750221300404, "learning_rate": 8.379077731965796e-06, "loss": 0.8386, "step": 252730 }, { "epoch": 4.437226777155498, "grad_norm": 0.04755479596816508, "learning_rate": 8.37514289045383e-06, "loss": 0.8386, "step": 252740 }, { "epoch": 4.437402342035499, "grad_norm": 0.0511913280570267, "learning_rate": 8.371209222516572e-06, "loss": 0.8428, "step": 252750 }, { "epoch": 4.437577906915501, "grad_norm": 0.045271877110248285, "learning_rate": 8.367276728203848e-06, "loss": 0.8423, "step": 252760 }, { "epoch": 4.437753471795502, "grad_norm": 0.055855419191929, "learning_rate": 8.363345407565467e-06, "loss": 0.8326, "step": 252770 }, { "epoch": 4.437929036675503, "grad_norm": 0.057117335921622714, "learning_rate": 8.359415260651268e-06, "loss": 0.8305, "step": 252780 }, { "epoch": 4.438104601555505, "grad_norm": 0.06915939177231492, "learning_rate": 8.355486287511024e-06, "loss": 0.8435, "step": 252790 }, { "epoch": 4.438280166435506, "grad_norm": 0.05858125913631465, "learning_rate": 8.35155848819454e-06, "loss": 0.8374, "step": 252800 }, { "epoch": 4.438455731315508, "grad_norm": 0.05820110215708391, "learning_rate": 8.347631862751542e-06, "loss": 0.833, "step": 252810 }, { "epoch": 4.438631296195509, "grad_norm": 0.05018260336805941, "learning_rate": 8.343706411231817e-06, "loss": 0.8333, "step": 252820 }, { "epoch": 4.438806861075511, "grad_norm": 0.04940950759425424, "learning_rate": 8.339782133685098e-06, "loss": 0.8369, "step": 252830 }, { "epoch": 4.438982425955512, "grad_norm": 0.048678496439097, "learning_rate": 8.33585903016107e-06, "loss": 0.8416, "step": 252840 }, { "epoch": 4.439157990835513, "grad_norm": 0.04791758201478931, "learning_rate": 8.331937100709488e-06, "loss": 0.8328, "step": 252850 }, { "epoch": 4.439333555715515, "grad_norm": 0.04614173672598014, "learning_rate": 8.328016345380021e-06, "loss": 0.8468, "step": 252860 }, { "epoch": 4.439509120595516, "grad_norm": 0.04897909292366216, "learning_rate": 8.324096764222339e-06, "loss": 0.8362, "step": 252870 }, { "epoch": 4.439684685475518, "grad_norm": 0.0453455886252759, "learning_rate": 8.320178357286133e-06, "loss": 0.8397, "step": 252880 }, { "epoch": 4.439860250355519, "grad_norm": 0.03846464490985724, "learning_rate": 8.316261124621005e-06, "loss": 0.84, "step": 252890 }, { "epoch": 4.44003581523552, "grad_norm": 0.04488780462271407, "learning_rate": 8.312345066276628e-06, "loss": 0.8308, "step": 252900 }, { "epoch": 4.440211380115522, "grad_norm": 0.07241250336317415, "learning_rate": 8.308430182302582e-06, "loss": 0.8331, "step": 252910 }, { "epoch": 4.440386944995523, "grad_norm": 0.0465909713946117, "learning_rate": 8.30451647274851e-06, "loss": 0.8441, "step": 252920 }, { "epoch": 4.440562509875525, "grad_norm": 0.051052336951995986, "learning_rate": 8.300603937663975e-06, "loss": 0.8367, "step": 252930 }, { "epoch": 4.440738074755526, "grad_norm": 0.054674620939189685, "learning_rate": 8.296692577098549e-06, "loss": 0.8357, "step": 252940 }, { "epoch": 4.440913639635527, "grad_norm": 0.05392716597715143, "learning_rate": 8.2927823911018e-06, "loss": 0.8402, "step": 252950 }, { "epoch": 4.441089204515529, "grad_norm": 0.04513381752561494, "learning_rate": 8.288873379723263e-06, "loss": 0.8487, "step": 252960 }, { "epoch": 4.44126476939553, "grad_norm": 0.1179500438852326, "learning_rate": 8.284965543012482e-06, "loss": 0.8452, "step": 252970 }, { "epoch": 4.441440334275532, "grad_norm": 0.04547282660596662, "learning_rate": 8.281058881018932e-06, "loss": 0.8431, "step": 252980 }, { "epoch": 4.441615899155533, "grad_norm": 0.049829303656315424, "learning_rate": 8.277153393792147e-06, "loss": 0.8359, "step": 252990 }, { "epoch": 4.441791464035535, "grad_norm": 0.053116792847342525, "learning_rate": 8.2732490813816e-06, "loss": 0.8425, "step": 253000 }, { "epoch": 4.441967028915536, "grad_norm": 0.05445839316961897, "learning_rate": 8.26934594383674e-06, "loss": 0.8402, "step": 253010 }, { "epoch": 4.442142593795537, "grad_norm": 0.054964048092426616, "learning_rate": 8.265443981207064e-06, "loss": 0.8411, "step": 253020 }, { "epoch": 4.442318158675539, "grad_norm": 0.05458702421229574, "learning_rate": 8.261543193541968e-06, "loss": 0.8304, "step": 253030 }, { "epoch": 4.44249372355554, "grad_norm": 0.06390618096835546, "learning_rate": 8.257643580890904e-06, "loss": 0.8278, "step": 253040 }, { "epoch": 4.442669288435542, "grad_norm": 0.055144928957749376, "learning_rate": 8.25374514330327e-06, "loss": 0.85, "step": 253050 }, { "epoch": 4.442844853315543, "grad_norm": 0.10617738964115878, "learning_rate": 8.24984788082845e-06, "loss": 0.8328, "step": 253060 }, { "epoch": 4.443020418195545, "grad_norm": 0.05266719538229584, "learning_rate": 8.245951793515852e-06, "loss": 0.8391, "step": 253070 }, { "epoch": 4.443195983075546, "grad_norm": 0.05181205066873956, "learning_rate": 8.242056881414787e-06, "loss": 0.8383, "step": 253080 }, { "epoch": 4.443371547955547, "grad_norm": 0.05116172601419715, "learning_rate": 8.238163144574679e-06, "loss": 0.8436, "step": 253090 }, { "epoch": 4.443547112835549, "grad_norm": 0.046016586555812894, "learning_rate": 8.234270583044792e-06, "loss": 0.8395, "step": 253100 }, { "epoch": 4.44372267771555, "grad_norm": 0.04643887209908536, "learning_rate": 8.230379196874476e-06, "loss": 0.8362, "step": 253110 }, { "epoch": 4.4438982425955516, "grad_norm": 0.04941678050234471, "learning_rate": 8.226488986113062e-06, "loss": 0.8316, "step": 253120 }, { "epoch": 4.444073807475553, "grad_norm": 0.05820255664293819, "learning_rate": 8.222599950809786e-06, "loss": 0.8372, "step": 253130 }, { "epoch": 4.444249372355554, "grad_norm": 0.05412807515830896, "learning_rate": 8.218712091013972e-06, "loss": 0.8405, "step": 253140 }, { "epoch": 4.4444249372355555, "grad_norm": 0.045457937416361324, "learning_rate": 8.214825406774833e-06, "loss": 0.8424, "step": 253150 }, { "epoch": 4.444600502115557, "grad_norm": 0.04923724711260702, "learning_rate": 8.210939898141646e-06, "loss": 0.8369, "step": 253160 }, { "epoch": 4.4447760669955585, "grad_norm": 0.04857174026790331, "learning_rate": 8.207055565163649e-06, "loss": 0.8341, "step": 253170 }, { "epoch": 4.4449516318755595, "grad_norm": 0.059499086457694444, "learning_rate": 8.203172407890006e-06, "loss": 0.8311, "step": 253180 }, { "epoch": 4.445127196755561, "grad_norm": 0.14822179587027492, "learning_rate": 8.199290426369993e-06, "loss": 0.8397, "step": 253190 }, { "epoch": 4.4453027616355625, "grad_norm": 0.055472280291511224, "learning_rate": 8.195409620652727e-06, "loss": 0.8377, "step": 253200 }, { "epoch": 4.4454783265155635, "grad_norm": 0.04791118523063821, "learning_rate": 8.191529990787419e-06, "loss": 0.831, "step": 253210 }, { "epoch": 4.445653891395565, "grad_norm": 0.06110373366320234, "learning_rate": 8.187651536823216e-06, "loss": 0.8431, "step": 253220 }, { "epoch": 4.4458294562755665, "grad_norm": 0.055368615493865174, "learning_rate": 8.183774258809243e-06, "loss": 0.8396, "step": 253230 }, { "epoch": 4.446005021155568, "grad_norm": 0.0476741737366664, "learning_rate": 8.179898156794662e-06, "loss": 0.8416, "step": 253240 }, { "epoch": 4.446180586035569, "grad_norm": 0.059775098788216144, "learning_rate": 8.176023230828529e-06, "loss": 0.8309, "step": 253250 }, { "epoch": 4.4463561509155705, "grad_norm": 0.06331207516245135, "learning_rate": 8.17214948095999e-06, "loss": 0.8412, "step": 253260 }, { "epoch": 4.446531715795572, "grad_norm": 0.055230781479085236, "learning_rate": 8.168276907238105e-06, "loss": 0.8424, "step": 253270 }, { "epoch": 4.446707280675573, "grad_norm": 0.059389775356306976, "learning_rate": 8.164405509711937e-06, "loss": 0.8385, "step": 253280 }, { "epoch": 4.446882845555575, "grad_norm": 0.045297220468078235, "learning_rate": 8.160535288430563e-06, "loss": 0.8431, "step": 253290 }, { "epoch": 4.447058410435576, "grad_norm": 0.05332083173527712, "learning_rate": 8.156666243442978e-06, "loss": 0.8368, "step": 253300 }, { "epoch": 4.447233975315578, "grad_norm": 0.0482515273672711, "learning_rate": 8.152798374798239e-06, "loss": 0.8392, "step": 253310 }, { "epoch": 4.447409540195579, "grad_norm": 0.042731923727394754, "learning_rate": 8.148931682545333e-06, "loss": 0.844, "step": 253320 }, { "epoch": 4.44758510507558, "grad_norm": 0.052132700994759945, "learning_rate": 8.145066166733248e-06, "loss": 0.8399, "step": 253330 }, { "epoch": 4.447760669955582, "grad_norm": 0.06141558138925998, "learning_rate": 8.141201827410992e-06, "loss": 0.8315, "step": 253340 }, { "epoch": 4.447936234835583, "grad_norm": 0.05157409400234576, "learning_rate": 8.137338664627466e-06, "loss": 0.8414, "step": 253350 }, { "epoch": 4.448111799715585, "grad_norm": 0.06101725485466882, "learning_rate": 8.133476678431695e-06, "loss": 0.8338, "step": 253360 }, { "epoch": 4.448287364595586, "grad_norm": 0.04827084949321106, "learning_rate": 8.12961586887255e-06, "loss": 0.8398, "step": 253370 }, { "epoch": 4.448462929475587, "grad_norm": 0.05589793441619098, "learning_rate": 8.12575623599898e-06, "loss": 0.8343, "step": 253380 }, { "epoch": 4.448638494355589, "grad_norm": 0.06459981341385668, "learning_rate": 8.121897779859881e-06, "loss": 0.8372, "step": 253390 }, { "epoch": 4.44881405923559, "grad_norm": 0.047644218711099556, "learning_rate": 8.118040500504134e-06, "loss": 0.8342, "step": 253400 }, { "epoch": 4.448989624115592, "grad_norm": 0.04988771952197799, "learning_rate": 8.114184397980626e-06, "loss": 0.8389, "step": 253410 }, { "epoch": 4.449165188995593, "grad_norm": 0.06508353985604418, "learning_rate": 8.11032947233817e-06, "loss": 0.8512, "step": 253420 }, { "epoch": 4.449340753875595, "grad_norm": 0.051086824338451735, "learning_rate": 8.106475723625664e-06, "loss": 0.8382, "step": 253430 }, { "epoch": 4.449516318755596, "grad_norm": 0.05505812923507727, "learning_rate": 8.102623151891914e-06, "loss": 0.8372, "step": 253440 }, { "epoch": 4.449691883635597, "grad_norm": 0.05176933707280488, "learning_rate": 8.098771757185725e-06, "loss": 0.8476, "step": 253450 }, { "epoch": 4.449867448515599, "grad_norm": 0.0547178764172615, "learning_rate": 8.094921539555913e-06, "loss": 0.8425, "step": 253460 }, { "epoch": 4.4500430133956, "grad_norm": 0.054543163193648554, "learning_rate": 8.091072499051248e-06, "loss": 0.8281, "step": 253470 }, { "epoch": 4.450218578275602, "grad_norm": 0.05724473494578292, "learning_rate": 8.087224635720502e-06, "loss": 0.8455, "step": 253480 }, { "epoch": 4.450394143155603, "grad_norm": 0.059174847770914786, "learning_rate": 8.083377949612417e-06, "loss": 0.8451, "step": 253490 }, { "epoch": 4.450569708035604, "grad_norm": 0.055770713103994576, "learning_rate": 8.079532440775748e-06, "loss": 0.8386, "step": 253500 }, { "epoch": 4.450745272915606, "grad_norm": 0.04236396922273869, "learning_rate": 8.075688109259217e-06, "loss": 0.8393, "step": 253510 }, { "epoch": 4.450920837795607, "grad_norm": 0.04947024236040556, "learning_rate": 8.071844955111495e-06, "loss": 0.8402, "step": 253520 }, { "epoch": 4.451096402675609, "grad_norm": 0.04138167583861395, "learning_rate": 8.068002978381349e-06, "loss": 0.8362, "step": 253530 }, { "epoch": 4.45127196755561, "grad_norm": 0.0549308777121658, "learning_rate": 8.064162179117383e-06, "loss": 0.8373, "step": 253540 }, { "epoch": 4.451447532435612, "grad_norm": 0.06198457496529774, "learning_rate": 8.06032255736831e-06, "loss": 0.8368, "step": 253550 }, { "epoch": 4.451623097315613, "grad_norm": 0.048692960729082566, "learning_rate": 8.056484113182763e-06, "loss": 0.8365, "step": 253560 }, { "epoch": 4.451798662195614, "grad_norm": 0.06041423796825397, "learning_rate": 8.052646846609374e-06, "loss": 0.839, "step": 253570 }, { "epoch": 4.451974227075616, "grad_norm": 0.0852688626785839, "learning_rate": 8.048810757696773e-06, "loss": 0.8411, "step": 253580 }, { "epoch": 4.452149791955617, "grad_norm": 0.04919059618678376, "learning_rate": 8.04497584649352e-06, "loss": 0.8348, "step": 253590 }, { "epoch": 4.452325356835619, "grad_norm": 0.05630567402871224, "learning_rate": 8.041142113048265e-06, "loss": 0.8383, "step": 253600 }, { "epoch": 4.45250092171562, "grad_norm": 0.052039007033881306, "learning_rate": 8.037309557409541e-06, "loss": 0.8438, "step": 253610 }, { "epoch": 4.452676486595621, "grad_norm": 0.05574571774647606, "learning_rate": 8.033478179625922e-06, "loss": 0.8346, "step": 253620 }, { "epoch": 4.452852051475623, "grad_norm": 0.043972624481849916, "learning_rate": 8.029647979745977e-06, "loss": 0.8416, "step": 253630 }, { "epoch": 4.453027616355624, "grad_norm": 0.05688775843067392, "learning_rate": 8.025818957818175e-06, "loss": 0.838, "step": 253640 }, { "epoch": 4.453203181235626, "grad_norm": 0.04644066711347084, "learning_rate": 8.0219911138911e-06, "loss": 0.8532, "step": 253650 }, { "epoch": 4.453378746115627, "grad_norm": 0.051726798886964284, "learning_rate": 8.018164448013189e-06, "loss": 0.8343, "step": 253660 }, { "epoch": 4.453554310995629, "grad_norm": 0.04006401692182455, "learning_rate": 8.014338960232959e-06, "loss": 0.8328, "step": 253670 }, { "epoch": 4.45372987587563, "grad_norm": 0.053863293662990905, "learning_rate": 8.010514650598889e-06, "loss": 0.8386, "step": 253680 }, { "epoch": 4.453905440755631, "grad_norm": 0.052369016239497336, "learning_rate": 8.006691519159386e-06, "loss": 0.8432, "step": 253690 }, { "epoch": 4.454081005635633, "grad_norm": 0.05489301568503847, "learning_rate": 8.002869565962964e-06, "loss": 0.8473, "step": 253700 }, { "epoch": 4.454256570515634, "grad_norm": 0.055192559433276035, "learning_rate": 7.999048791057988e-06, "loss": 0.8374, "step": 253710 }, { "epoch": 4.454432135395636, "grad_norm": 0.048221739777525696, "learning_rate": 7.995229194492882e-06, "loss": 0.8328, "step": 253720 }, { "epoch": 4.454607700275637, "grad_norm": 0.06389700473010076, "learning_rate": 7.991410776316062e-06, "loss": 0.8324, "step": 253730 }, { "epoch": 4.454783265155639, "grad_norm": 0.05312736320049464, "learning_rate": 7.987593536575879e-06, "loss": 0.8439, "step": 253740 }, { "epoch": 4.45495883003564, "grad_norm": 0.04896230923543457, "learning_rate": 7.983777475320728e-06, "loss": 0.8409, "step": 253750 }, { "epoch": 4.455134394915641, "grad_norm": 0.05168414843814896, "learning_rate": 7.979962592598916e-06, "loss": 0.841, "step": 253760 }, { "epoch": 4.455309959795643, "grad_norm": 0.05541396790048837, "learning_rate": 7.976148888458834e-06, "loss": 0.8412, "step": 253770 }, { "epoch": 4.455485524675644, "grad_norm": 0.04346581417227673, "learning_rate": 7.97233636294875e-06, "loss": 0.8407, "step": 253780 }, { "epoch": 4.455661089555646, "grad_norm": 0.04703002568446316, "learning_rate": 7.968525016116997e-06, "loss": 0.837, "step": 253790 }, { "epoch": 4.455836654435647, "grad_norm": 0.0594934048509442, "learning_rate": 7.964714848011885e-06, "loss": 0.8369, "step": 253800 }, { "epoch": 4.456012219315648, "grad_norm": 0.052955061000342814, "learning_rate": 7.960905858681643e-06, "loss": 0.8351, "step": 253810 }, { "epoch": 4.45618778419565, "grad_norm": 0.048876992907327776, "learning_rate": 7.957098048174577e-06, "loss": 0.8433, "step": 253820 }, { "epoch": 4.456363349075651, "grad_norm": 0.05492853889005068, "learning_rate": 7.953291416538883e-06, "loss": 0.8399, "step": 253830 }, { "epoch": 4.456538913955653, "grad_norm": 0.042994349682831244, "learning_rate": 7.949485963822825e-06, "loss": 0.8394, "step": 253840 }, { "epoch": 4.456714478835654, "grad_norm": 0.057276780436262935, "learning_rate": 7.94568169007463e-06, "loss": 0.8357, "step": 253850 }, { "epoch": 4.456890043715655, "grad_norm": 0.04012895721559447, "learning_rate": 7.941878595342453e-06, "loss": 0.8373, "step": 253860 }, { "epoch": 4.457065608595657, "grad_norm": 0.06418185329992672, "learning_rate": 7.938076679674533e-06, "loss": 0.8527, "step": 253870 }, { "epoch": 4.457241173475658, "grad_norm": 0.05788198822139475, "learning_rate": 7.93427594311901e-06, "loss": 0.8479, "step": 253880 }, { "epoch": 4.4574167383556595, "grad_norm": 0.04207821420534781, "learning_rate": 7.930476385724043e-06, "loss": 0.8443, "step": 253890 }, { "epoch": 4.457592303235661, "grad_norm": 0.060905498150800576, "learning_rate": 7.926678007537785e-06, "loss": 0.8414, "step": 253900 }, { "epoch": 4.4577678681156625, "grad_norm": 0.14902456383508908, "learning_rate": 7.922880808608343e-06, "loss": 0.8384, "step": 253910 }, { "epoch": 4.4579434329956635, "grad_norm": 0.04809189455593536, "learning_rate": 7.919084788983853e-06, "loss": 0.8359, "step": 253920 }, { "epoch": 4.458118997875665, "grad_norm": 0.0603285470948903, "learning_rate": 7.91528994871237e-06, "loss": 0.8402, "step": 253930 }, { "epoch": 4.4582945627556665, "grad_norm": 0.04468239535240472, "learning_rate": 7.911496287842037e-06, "loss": 0.8491, "step": 253940 }, { "epoch": 4.4584701276356675, "grad_norm": 0.054815539911039504, "learning_rate": 7.907703806420872e-06, "loss": 0.8379, "step": 253950 }, { "epoch": 4.4586456925156694, "grad_norm": 0.06638031433099234, "learning_rate": 7.90391250449693e-06, "loss": 0.8397, "step": 253960 }, { "epoch": 4.4588212573956705, "grad_norm": 0.04495647904352887, "learning_rate": 7.900122382118288e-06, "loss": 0.8409, "step": 253970 }, { "epoch": 4.458996822275672, "grad_norm": 0.04538052214090267, "learning_rate": 7.89633343933291e-06, "loss": 0.8351, "step": 253980 }, { "epoch": 4.459172387155673, "grad_norm": 0.07298584972438256, "learning_rate": 7.892545676188847e-06, "loss": 0.8356, "step": 253990 }, { "epoch": 4.4593479520356745, "grad_norm": 0.10549250938980217, "learning_rate": 7.88875909273407e-06, "loss": 0.8384, "step": 254000 }, { "epoch": 4.459523516915676, "grad_norm": 0.04631578068089807, "learning_rate": 7.88497368901655e-06, "loss": 0.8359, "step": 254010 }, { "epoch": 4.459699081795677, "grad_norm": 0.04601402322724891, "learning_rate": 7.88118946508428e-06, "loss": 0.834, "step": 254020 }, { "epoch": 4.459874646675679, "grad_norm": 0.052822487467334533, "learning_rate": 7.877406420985144e-06, "loss": 0.8389, "step": 254030 }, { "epoch": 4.46005021155568, "grad_norm": 0.049600438143670746, "learning_rate": 7.873624556767156e-06, "loss": 0.8358, "step": 254040 }, { "epoch": 4.460225776435681, "grad_norm": 0.05448336542191109, "learning_rate": 7.869843872478181e-06, "loss": 0.834, "step": 254050 }, { "epoch": 4.460401341315683, "grad_norm": 0.03703100035262663, "learning_rate": 7.866064368166127e-06, "loss": 0.8456, "step": 254060 }, { "epoch": 4.460576906195684, "grad_norm": 0.0542857532079856, "learning_rate": 7.862286043878896e-06, "loss": 0.8343, "step": 254070 }, { "epoch": 4.460752471075686, "grad_norm": 0.0519679122898802, "learning_rate": 7.858508899664347e-06, "loss": 0.8442, "step": 254080 }, { "epoch": 4.460928035955687, "grad_norm": 0.05926567340877031, "learning_rate": 7.854732935570339e-06, "loss": 0.8377, "step": 254090 }, { "epoch": 4.461103600835689, "grad_norm": 0.05879898235358202, "learning_rate": 7.850958151644702e-06, "loss": 0.8358, "step": 254100 }, { "epoch": 4.46127916571569, "grad_norm": 0.06473967846003433, "learning_rate": 7.847184547935296e-06, "loss": 0.8372, "step": 254110 }, { "epoch": 4.461454730595691, "grad_norm": 0.048171953737021704, "learning_rate": 7.843412124489907e-06, "loss": 0.8372, "step": 254120 }, { "epoch": 4.461630295475693, "grad_norm": 0.05935363141194245, "learning_rate": 7.839640881356342e-06, "loss": 0.8441, "step": 254130 }, { "epoch": 4.461805860355694, "grad_norm": 0.059069066389590544, "learning_rate": 7.835870818582392e-06, "loss": 0.8371, "step": 254140 }, { "epoch": 4.461981425235696, "grad_norm": 0.05920948742293633, "learning_rate": 7.832101936215793e-06, "loss": 0.8315, "step": 254150 }, { "epoch": 4.462156990115697, "grad_norm": 0.04630016091246819, "learning_rate": 7.828334234304344e-06, "loss": 0.8537, "step": 254160 }, { "epoch": 4.462332554995698, "grad_norm": 0.04784439137970437, "learning_rate": 7.824567712895737e-06, "loss": 0.8412, "step": 254170 }, { "epoch": 4.4625081198757, "grad_norm": 0.0440660019940757, "learning_rate": 7.820802372037704e-06, "loss": 0.8394, "step": 254180 }, { "epoch": 4.462683684755701, "grad_norm": 0.0484652309084411, "learning_rate": 7.817038211777994e-06, "loss": 0.8344, "step": 254190 }, { "epoch": 4.462859249635703, "grad_norm": 0.051024580571682264, "learning_rate": 7.813275232164233e-06, "loss": 0.841, "step": 254200 }, { "epoch": 4.463034814515704, "grad_norm": 0.07024351353297857, "learning_rate": 7.809513433244152e-06, "loss": 0.8291, "step": 254210 }, { "epoch": 4.463210379395706, "grad_norm": 0.05390775461720687, "learning_rate": 7.80575281506539e-06, "loss": 0.8415, "step": 254220 }, { "epoch": 4.463385944275707, "grad_norm": 0.052189061021326165, "learning_rate": 7.801993377675601e-06, "loss": 0.8441, "step": 254230 }, { "epoch": 4.463561509155708, "grad_norm": 0.08306393701580858, "learning_rate": 7.798235121122437e-06, "loss": 0.836, "step": 254240 }, { "epoch": 4.46373707403571, "grad_norm": 0.0478594611149379, "learning_rate": 7.794478045453476e-06, "loss": 0.839, "step": 254250 }, { "epoch": 4.463912638915711, "grad_norm": 0.06244442585642008, "learning_rate": 7.790722150716353e-06, "loss": 0.8363, "step": 254260 }, { "epoch": 4.464088203795713, "grad_norm": 0.05180210370247561, "learning_rate": 7.78696743695862e-06, "loss": 0.8423, "step": 254270 }, { "epoch": 4.464263768675714, "grad_norm": 0.058940682850389184, "learning_rate": 7.783213904227896e-06, "loss": 0.8384, "step": 254280 }, { "epoch": 4.464439333555715, "grad_norm": 0.04620292695825995, "learning_rate": 7.779461552571712e-06, "loss": 0.8361, "step": 254290 }, { "epoch": 4.464614898435717, "grad_norm": 0.06749595924047568, "learning_rate": 7.775710382037604e-06, "loss": 0.8354, "step": 254300 }, { "epoch": 4.464790463315718, "grad_norm": 0.06280812916085882, "learning_rate": 7.77196039267314e-06, "loss": 0.8381, "step": 254310 }, { "epoch": 4.46496602819572, "grad_norm": 0.04265591537173002, "learning_rate": 7.76821158452578e-06, "loss": 0.8304, "step": 254320 }, { "epoch": 4.465141593075721, "grad_norm": 0.04957870483249826, "learning_rate": 7.764463957643062e-06, "loss": 0.8387, "step": 254330 }, { "epoch": 4.465317157955723, "grad_norm": 0.04220859628809577, "learning_rate": 7.760717512072447e-06, "loss": 0.8358, "step": 254340 }, { "epoch": 4.465492722835724, "grad_norm": 0.048275048728328857, "learning_rate": 7.756972247861406e-06, "loss": 0.8476, "step": 254350 }, { "epoch": 4.465668287715725, "grad_norm": 0.05643791290999846, "learning_rate": 7.753228165057408e-06, "loss": 0.8383, "step": 254360 }, { "epoch": 4.465843852595727, "grad_norm": 0.08182084066815044, "learning_rate": 7.749485263707852e-06, "loss": 0.8364, "step": 254370 }, { "epoch": 4.466019417475728, "grad_norm": 0.05274199476959374, "learning_rate": 7.745743543860222e-06, "loss": 0.8368, "step": 254380 }, { "epoch": 4.46619498235573, "grad_norm": 0.04928010993099302, "learning_rate": 7.742003005561877e-06, "loss": 0.8405, "step": 254390 }, { "epoch": 4.466370547235731, "grad_norm": 0.056194257551149755, "learning_rate": 7.738263648860227e-06, "loss": 0.843, "step": 254400 }, { "epoch": 4.466546112115733, "grad_norm": 0.049498852901331505, "learning_rate": 7.734525473802658e-06, "loss": 0.8439, "step": 254410 }, { "epoch": 4.466721676995734, "grad_norm": 0.04699841958760405, "learning_rate": 7.730788480436521e-06, "loss": 0.8476, "step": 254420 }, { "epoch": 4.466897241875735, "grad_norm": 0.05731494508784165, "learning_rate": 7.727052668809175e-06, "loss": 0.8426, "step": 254430 }, { "epoch": 4.467072806755737, "grad_norm": 0.053011495284430145, "learning_rate": 7.723318038967927e-06, "loss": 0.8391, "step": 254440 }, { "epoch": 4.467248371635738, "grad_norm": 0.04312337912236825, "learning_rate": 7.719584590960138e-06, "loss": 0.8407, "step": 254450 }, { "epoch": 4.46742393651574, "grad_norm": 0.045295759484497725, "learning_rate": 7.715852324833082e-06, "loss": 0.8395, "step": 254460 }, { "epoch": 4.467599501395741, "grad_norm": 0.0601482097493204, "learning_rate": 7.712121240634042e-06, "loss": 0.8422, "step": 254470 }, { "epoch": 4.467775066275742, "grad_norm": 0.051742626955988566, "learning_rate": 7.708391338410336e-06, "loss": 0.8378, "step": 254480 }, { "epoch": 4.467950631155744, "grad_norm": 0.06343848379078056, "learning_rate": 7.704662618209159e-06, "loss": 0.8366, "step": 254490 }, { "epoch": 4.468126196035745, "grad_norm": 0.04716398392354567, "learning_rate": 7.700935080077806e-06, "loss": 0.8363, "step": 254500 }, { "epoch": 4.468301760915747, "grad_norm": 0.05128188363420445, "learning_rate": 7.697208724063475e-06, "loss": 0.8366, "step": 254510 }, { "epoch": 4.468477325795748, "grad_norm": 0.0404099532543392, "learning_rate": 7.693483550213397e-06, "loss": 0.8408, "step": 254520 }, { "epoch": 4.468652890675749, "grad_norm": 0.06589654884758527, "learning_rate": 7.689759558574774e-06, "loss": 0.8448, "step": 254530 }, { "epoch": 4.468828455555751, "grad_norm": 0.04719089672857979, "learning_rate": 7.686036749194752e-06, "loss": 0.8452, "step": 254540 }, { "epoch": 4.469004020435752, "grad_norm": 0.041584216833093185, "learning_rate": 7.682315122120557e-06, "loss": 0.8441, "step": 254550 }, { "epoch": 4.469179585315754, "grad_norm": 0.06166676754229799, "learning_rate": 7.678594677399299e-06, "loss": 0.8311, "step": 254560 }, { "epoch": 4.469355150195755, "grad_norm": 0.04973056863631194, "learning_rate": 7.67487541507813e-06, "loss": 0.8402, "step": 254570 }, { "epoch": 4.469530715075757, "grad_norm": 0.05066015192234843, "learning_rate": 7.67115733520418e-06, "loss": 0.8329, "step": 254580 }, { "epoch": 4.469706279955758, "grad_norm": 0.047129951309531765, "learning_rate": 7.667440437824557e-06, "loss": 0.8356, "step": 254590 }, { "epoch": 4.469881844835759, "grad_norm": 0.06626886701802695, "learning_rate": 7.66372472298635e-06, "loss": 0.8465, "step": 254600 }, { "epoch": 4.470057409715761, "grad_norm": 0.059540565649613124, "learning_rate": 7.660010190736616e-06, "loss": 0.8408, "step": 254610 }, { "epoch": 4.470232974595762, "grad_norm": 0.05241816160942367, "learning_rate": 7.656296841122474e-06, "loss": 0.8355, "step": 254620 }, { "epoch": 4.4704085394757636, "grad_norm": 0.06861513042550292, "learning_rate": 7.652584674190924e-06, "loss": 0.8364, "step": 254630 }, { "epoch": 4.470584104355765, "grad_norm": 0.0779868395926562, "learning_rate": 7.648873689989014e-06, "loss": 0.8456, "step": 254640 }, { "epoch": 4.4707596692357665, "grad_norm": 0.04310760822069281, "learning_rate": 7.645163888563772e-06, "loss": 0.843, "step": 254650 }, { "epoch": 4.4709352341157675, "grad_norm": 0.053634464764067476, "learning_rate": 7.641455269962189e-06, "loss": 0.8388, "step": 254660 }, { "epoch": 4.471110798995769, "grad_norm": 0.052118958698317105, "learning_rate": 7.637747834231272e-06, "loss": 0.8385, "step": 254670 }, { "epoch": 4.4712863638757705, "grad_norm": 0.05924436970875331, "learning_rate": 7.634041581417974e-06, "loss": 0.8344, "step": 254680 }, { "epoch": 4.4714619287557715, "grad_norm": 0.06919418811809365, "learning_rate": 7.630336511569256e-06, "loss": 0.8432, "step": 254690 }, { "epoch": 4.4716374936357735, "grad_norm": 0.04782795548943559, "learning_rate": 7.626632624732086e-06, "loss": 0.8432, "step": 254700 }, { "epoch": 4.4718130585157745, "grad_norm": 0.04418823679817901, "learning_rate": 7.622929920953345e-06, "loss": 0.8361, "step": 254710 }, { "epoch": 4.4719886233957755, "grad_norm": 0.0572327535518679, "learning_rate": 7.619228400280011e-06, "loss": 0.8332, "step": 254720 }, { "epoch": 4.472164188275777, "grad_norm": 0.048763449070432806, "learning_rate": 7.615528062758925e-06, "loss": 0.8388, "step": 254730 }, { "epoch": 4.4723397531557785, "grad_norm": 0.07420942525092306, "learning_rate": 7.611828908437002e-06, "loss": 0.8389, "step": 254740 }, { "epoch": 4.47251531803578, "grad_norm": 0.053060907124893156, "learning_rate": 7.608130937361128e-06, "loss": 0.84, "step": 254750 }, { "epoch": 4.472690882915781, "grad_norm": 0.0621892941526554, "learning_rate": 7.604434149578103e-06, "loss": 0.8373, "step": 254760 }, { "epoch": 4.4728664477957825, "grad_norm": 0.05250844579056338, "learning_rate": 7.600738545134817e-06, "loss": 0.8414, "step": 254770 }, { "epoch": 4.473042012675784, "grad_norm": 0.049170066925130596, "learning_rate": 7.597044124078047e-06, "loss": 0.8411, "step": 254780 }, { "epoch": 4.473217577555785, "grad_norm": 0.051947615848269485, "learning_rate": 7.59335088645466e-06, "loss": 0.8408, "step": 254790 }, { "epoch": 4.473393142435787, "grad_norm": 0.04619302549727149, "learning_rate": 7.5896588323114025e-06, "loss": 0.8441, "step": 254800 }, { "epoch": 4.473568707315788, "grad_norm": 0.0455508762012442, "learning_rate": 7.585967961695063e-06, "loss": 0.8447, "step": 254810 }, { "epoch": 4.47374427219579, "grad_norm": 0.03993761752180329, "learning_rate": 7.582278274652432e-06, "loss": 0.8417, "step": 254820 }, { "epoch": 4.473919837075791, "grad_norm": 0.05224660370523509, "learning_rate": 7.578589771230221e-06, "loss": 0.8373, "step": 254830 }, { "epoch": 4.474095401955792, "grad_norm": 0.044344446625762875, "learning_rate": 7.5749024514752e-06, "loss": 0.8383, "step": 254840 }, { "epoch": 4.474270966835794, "grad_norm": 0.05719184697952422, "learning_rate": 7.571216315434057e-06, "loss": 0.8383, "step": 254850 }, { "epoch": 4.474446531715795, "grad_norm": 0.07104062733701597, "learning_rate": 7.567531363153505e-06, "loss": 0.8414, "step": 254860 }, { "epoch": 4.474622096595797, "grad_norm": 0.04558334638073091, "learning_rate": 7.563847594680245e-06, "loss": 0.8303, "step": 254870 }, { "epoch": 4.474797661475798, "grad_norm": 0.05759443615785554, "learning_rate": 7.560165010060927e-06, "loss": 0.8365, "step": 254880 }, { "epoch": 4.4749732263558, "grad_norm": 0.0491343450027992, "learning_rate": 7.556483609342249e-06, "loss": 0.8313, "step": 254890 }, { "epoch": 4.475148791235801, "grad_norm": 0.056769853226541156, "learning_rate": 7.5528033925708176e-06, "loss": 0.8415, "step": 254900 }, { "epoch": 4.475324356115802, "grad_norm": 0.06066874930463641, "learning_rate": 7.5491243597932745e-06, "loss": 0.8343, "step": 254910 }, { "epoch": 4.475499920995804, "grad_norm": 0.06437935103633036, "learning_rate": 7.545446511056248e-06, "loss": 0.8451, "step": 254920 }, { "epoch": 4.475675485875805, "grad_norm": 0.05545811362174651, "learning_rate": 7.541769846406319e-06, "loss": 0.8416, "step": 254930 }, { "epoch": 4.475851050755807, "grad_norm": 0.049657140929998934, "learning_rate": 7.538094365890078e-06, "loss": 0.8336, "step": 254940 }, { "epoch": 4.476026615635808, "grad_norm": 0.05973230390329226, "learning_rate": 7.5344200695540714e-06, "loss": 0.8449, "step": 254950 }, { "epoch": 4.476202180515809, "grad_norm": 0.052225820834596315, "learning_rate": 7.530746957444915e-06, "loss": 0.8378, "step": 254960 }, { "epoch": 4.476377745395811, "grad_norm": 0.06065782676768428, "learning_rate": 7.527075029609079e-06, "loss": 0.8251, "step": 254970 }, { "epoch": 4.476553310275812, "grad_norm": 0.05914358696276536, "learning_rate": 7.523404286093122e-06, "loss": 0.8386, "step": 254980 }, { "epoch": 4.476728875155814, "grad_norm": 0.05552500868340997, "learning_rate": 7.5197347269435584e-06, "loss": 0.8292, "step": 254990 }, { "epoch": 4.476904440035815, "grad_norm": 0.04826672228372833, "learning_rate": 7.5160663522068605e-06, "loss": 0.8356, "step": 255000 }, { "epoch": 4.477080004915817, "grad_norm": 0.04569417570607637, "learning_rate": 7.5123991619295315e-06, "loss": 0.8383, "step": 255010 }, { "epoch": 4.477255569795818, "grad_norm": 0.06090843828911807, "learning_rate": 7.508733156158007e-06, "loss": 0.8322, "step": 255020 }, { "epoch": 4.477431134675819, "grad_norm": 0.04114157636009874, "learning_rate": 7.5050683349387506e-06, "loss": 0.8343, "step": 255030 }, { "epoch": 4.477606699555821, "grad_norm": 0.05419820967203716, "learning_rate": 7.50140469831821e-06, "loss": 0.8328, "step": 255040 }, { "epoch": 4.477782264435822, "grad_norm": 0.04454911257545706, "learning_rate": 7.497742246342765e-06, "loss": 0.8404, "step": 255050 }, { "epoch": 4.477957829315824, "grad_norm": 0.06237324970619175, "learning_rate": 7.494080979058879e-06, "loss": 0.8421, "step": 255060 }, { "epoch": 4.478133394195825, "grad_norm": 0.04793649211583569, "learning_rate": 7.490420896512878e-06, "loss": 0.8381, "step": 255070 }, { "epoch": 4.478308959075827, "grad_norm": 0.047572472367698126, "learning_rate": 7.486761998751182e-06, "loss": 0.8423, "step": 255080 }, { "epoch": 4.478484523955828, "grad_norm": 0.05393240974228496, "learning_rate": 7.483104285820134e-06, "loss": 0.8422, "step": 255090 }, { "epoch": 4.478660088835829, "grad_norm": 0.04328795310952077, "learning_rate": 7.479447757766067e-06, "loss": 0.8359, "step": 255100 }, { "epoch": 4.478835653715831, "grad_norm": 0.046103197596077165, "learning_rate": 7.475792414635341e-06, "loss": 0.8327, "step": 255110 }, { "epoch": 4.479011218595832, "grad_norm": 0.06866609180353592, "learning_rate": 7.472138256474216e-06, "loss": 0.838, "step": 255120 }, { "epoch": 4.479186783475834, "grad_norm": 0.05535717263391965, "learning_rate": 7.468485283329043e-06, "loss": 0.8329, "step": 255130 }, { "epoch": 4.479362348355835, "grad_norm": 0.05054502574334718, "learning_rate": 7.464833495246088e-06, "loss": 0.8351, "step": 255140 }, { "epoch": 4.479537913235836, "grad_norm": 0.05036789939910729, "learning_rate": 7.461182892271605e-06, "loss": 0.8313, "step": 255150 }, { "epoch": 4.479713478115838, "grad_norm": 0.045986389542916936, "learning_rate": 7.457533474451883e-06, "loss": 0.8362, "step": 255160 }, { "epoch": 4.479889042995839, "grad_norm": 0.055569411893894416, "learning_rate": 7.453885241833131e-06, "loss": 0.8336, "step": 255170 }, { "epoch": 4.480064607875841, "grad_norm": 0.044487491445766414, "learning_rate": 7.450238194461584e-06, "loss": 0.8329, "step": 255180 }, { "epoch": 4.480240172755842, "grad_norm": 0.0665461487244328, "learning_rate": 7.446592332383442e-06, "loss": 0.8374, "step": 255190 }, { "epoch": 4.480415737635843, "grad_norm": 0.05051503162753059, "learning_rate": 7.442947655644898e-06, "loss": 0.8495, "step": 255200 }, { "epoch": 4.480591302515845, "grad_norm": 0.061300787824120795, "learning_rate": 7.4393041642921505e-06, "loss": 0.8396, "step": 255210 }, { "epoch": 4.480766867395846, "grad_norm": 0.04962279232018603, "learning_rate": 7.435661858371329e-06, "loss": 0.843, "step": 255220 }, { "epoch": 4.480942432275848, "grad_norm": 0.05082861687538463, "learning_rate": 7.432020737928627e-06, "loss": 0.8424, "step": 255230 }, { "epoch": 4.481117997155849, "grad_norm": 0.04943769915642633, "learning_rate": 7.428380803010146e-06, "loss": 0.8352, "step": 255240 }, { "epoch": 4.481293562035851, "grad_norm": 0.052502983779781444, "learning_rate": 7.424742053662002e-06, "loss": 0.84, "step": 255250 }, { "epoch": 4.481469126915852, "grad_norm": 0.04795331893837094, "learning_rate": 7.4211044899303374e-06, "loss": 0.8465, "step": 255260 }, { "epoch": 4.481644691795853, "grad_norm": 0.04658234366212989, "learning_rate": 7.417468111861184e-06, "loss": 0.8286, "step": 255270 }, { "epoch": 4.481820256675855, "grad_norm": 0.04289715128108351, "learning_rate": 7.413832919500668e-06, "loss": 0.8406, "step": 255280 }, { "epoch": 4.481995821555856, "grad_norm": 0.04824998492350425, "learning_rate": 7.410198912894792e-06, "loss": 0.8376, "step": 255290 }, { "epoch": 4.482171386435858, "grad_norm": 0.056891014159961076, "learning_rate": 7.406566092089665e-06, "loss": 0.8467, "step": 255300 }, { "epoch": 4.482346951315859, "grad_norm": 0.05505759467178569, "learning_rate": 7.402934457131268e-06, "loss": 0.8392, "step": 255310 }, { "epoch": 4.482522516195861, "grad_norm": 0.04958214682778057, "learning_rate": 7.399304008065627e-06, "loss": 0.839, "step": 255320 }, { "epoch": 4.482698081075862, "grad_norm": 0.05021894091492862, "learning_rate": 7.395674744938761e-06, "loss": 0.8375, "step": 255330 }, { "epoch": 4.482873645955863, "grad_norm": 0.05823853498363734, "learning_rate": 7.392046667796621e-06, "loss": 0.8343, "step": 255340 }, { "epoch": 4.483049210835865, "grad_norm": 0.05296831453951765, "learning_rate": 7.388419776685197e-06, "loss": 0.8396, "step": 255350 }, { "epoch": 4.483224775715866, "grad_norm": 0.05773851110370642, "learning_rate": 7.384794071650422e-06, "loss": 0.837, "step": 255360 }, { "epoch": 4.483400340595868, "grad_norm": 0.05338566651961475, "learning_rate": 7.38116955273826e-06, "loss": 0.8408, "step": 255370 }, { "epoch": 4.483575905475869, "grad_norm": 0.051773996754601974, "learning_rate": 7.377546219994622e-06, "loss": 0.8313, "step": 255380 }, { "epoch": 4.48375147035587, "grad_norm": 0.04472560531842852, "learning_rate": 7.373924073465394e-06, "loss": 0.8458, "step": 255390 }, { "epoch": 4.4839270352358715, "grad_norm": 0.05023178753786063, "learning_rate": 7.370303113196511e-06, "loss": 0.8388, "step": 255400 }, { "epoch": 4.484102600115873, "grad_norm": 0.11519708502795915, "learning_rate": 7.366683339233827e-06, "loss": 0.8343, "step": 255410 }, { "epoch": 4.4842781649958745, "grad_norm": 0.039773597837798394, "learning_rate": 7.363064751623208e-06, "loss": 0.8318, "step": 255420 }, { "epoch": 4.4844537298758755, "grad_norm": 0.049762467144542875, "learning_rate": 7.359447350410511e-06, "loss": 0.8467, "step": 255430 }, { "epoch": 4.484629294755877, "grad_norm": 0.0626546995706494, "learning_rate": 7.355831135641557e-06, "loss": 0.8426, "step": 255440 }, { "epoch": 4.4848048596358785, "grad_norm": 0.04558441728764477, "learning_rate": 7.352216107362179e-06, "loss": 0.8319, "step": 255450 }, { "epoch": 4.4849804245158795, "grad_norm": 0.051464664709201684, "learning_rate": 7.3486022656181455e-06, "loss": 0.8424, "step": 255460 }, { "epoch": 4.4851559893958814, "grad_norm": 0.04606643469924806, "learning_rate": 7.344989610455301e-06, "loss": 0.8402, "step": 255470 }, { "epoch": 4.4853315542758825, "grad_norm": 0.05492175872110588, "learning_rate": 7.341378141919367e-06, "loss": 0.8413, "step": 255480 }, { "epoch": 4.485507119155884, "grad_norm": 0.061908878440358174, "learning_rate": 7.3377678600561246e-06, "loss": 0.8447, "step": 255490 }, { "epoch": 4.485682684035885, "grad_norm": 0.04327613454515917, "learning_rate": 7.334158764911316e-06, "loss": 0.8434, "step": 255500 }, { "epoch": 4.4858582489158865, "grad_norm": 0.053215428826216216, "learning_rate": 7.330550856530667e-06, "loss": 0.8383, "step": 255510 }, { "epoch": 4.486033813795888, "grad_norm": 0.06568867083857424, "learning_rate": 7.326944134959901e-06, "loss": 0.8383, "step": 255520 }, { "epoch": 4.486209378675889, "grad_norm": 0.05054331436279004, "learning_rate": 7.323338600244683e-06, "loss": 0.8407, "step": 255530 }, { "epoch": 4.486384943555891, "grad_norm": 0.057141060643684016, "learning_rate": 7.319734252430732e-06, "loss": 0.8361, "step": 255540 }, { "epoch": 4.486560508435892, "grad_norm": 0.04818481912382552, "learning_rate": 7.31613109156371e-06, "loss": 0.8378, "step": 255550 }, { "epoch": 4.486736073315894, "grad_norm": 0.05723591141491616, "learning_rate": 7.312529117689225e-06, "loss": 0.8384, "step": 255560 }, { "epoch": 4.486911638195895, "grad_norm": 0.05754925302362289, "learning_rate": 7.308928330852985e-06, "loss": 0.8382, "step": 255570 }, { "epoch": 4.487087203075896, "grad_norm": 0.06549926957991233, "learning_rate": 7.305328731100572e-06, "loss": 0.8385, "step": 255580 }, { "epoch": 4.487262767955898, "grad_norm": 0.0423055545875055, "learning_rate": 7.30173031847759e-06, "loss": 0.8366, "step": 255590 }, { "epoch": 4.487438332835899, "grad_norm": 0.05785470601671392, "learning_rate": 7.298133093029661e-06, "loss": 0.8396, "step": 255600 }, { "epoch": 4.487613897715901, "grad_norm": 0.0460079433049, "learning_rate": 7.294537054802335e-06, "loss": 0.8372, "step": 255610 }, { "epoch": 4.487789462595902, "grad_norm": 0.05923979070029029, "learning_rate": 7.290942203841191e-06, "loss": 0.8374, "step": 255620 }, { "epoch": 4.487965027475903, "grad_norm": 0.058885527150874264, "learning_rate": 7.287348540191733e-06, "loss": 0.8342, "step": 255630 }, { "epoch": 4.488140592355905, "grad_norm": 0.053229197821959465, "learning_rate": 7.283756063899573e-06, "loss": 0.8347, "step": 255640 }, { "epoch": 4.488316157235906, "grad_norm": 0.04770885119615419, "learning_rate": 7.280164775010162e-06, "loss": 0.8358, "step": 255650 }, { "epoch": 4.488491722115908, "grad_norm": 0.049742767255494424, "learning_rate": 7.2765746735690256e-06, "loss": 0.8337, "step": 255660 }, { "epoch": 4.488667286995909, "grad_norm": 0.0612008300224235, "learning_rate": 7.272985759621655e-06, "loss": 0.8429, "step": 255670 }, { "epoch": 4.488842851875911, "grad_norm": 0.051156290467532714, "learning_rate": 7.269398033213509e-06, "loss": 0.8336, "step": 255680 }, { "epoch": 4.489018416755912, "grad_norm": 0.05778683967356671, "learning_rate": 7.2658114943900734e-06, "loss": 0.846, "step": 255690 }, { "epoch": 4.489193981635913, "grad_norm": 0.05964509638903737, "learning_rate": 7.26222614319676e-06, "loss": 0.8265, "step": 255700 }, { "epoch": 4.489369546515915, "grad_norm": 0.051774304564097436, "learning_rate": 7.258641979678995e-06, "loss": 0.8385, "step": 255710 }, { "epoch": 4.489545111395916, "grad_norm": 0.050392410788030965, "learning_rate": 7.2550590038822185e-06, "loss": 0.8261, "step": 255720 }, { "epoch": 4.489720676275918, "grad_norm": 0.044864083193910914, "learning_rate": 7.251477215851779e-06, "loss": 0.8384, "step": 255730 }, { "epoch": 4.489896241155919, "grad_norm": 0.054470403374418545, "learning_rate": 7.247896615633126e-06, "loss": 0.8438, "step": 255740 }, { "epoch": 4.490071806035921, "grad_norm": 0.04708838241789794, "learning_rate": 7.244317203271575e-06, "loss": 0.8441, "step": 255750 }, { "epoch": 4.490247370915922, "grad_norm": 0.05650908326730408, "learning_rate": 7.240738978812489e-06, "loss": 0.8297, "step": 255760 }, { "epoch": 4.490422935795923, "grad_norm": 0.04161941036077219, "learning_rate": 7.237161942301225e-06, "loss": 0.8406, "step": 255770 }, { "epoch": 4.490598500675925, "grad_norm": 0.050185504585723774, "learning_rate": 7.233586093783082e-06, "loss": 0.8392, "step": 255780 }, { "epoch": 4.490774065555926, "grad_norm": 0.057389684592258794, "learning_rate": 7.230011433303371e-06, "loss": 0.837, "step": 255790 }, { "epoch": 4.490949630435928, "grad_norm": 0.046990165657880445, "learning_rate": 7.226437960907373e-06, "loss": 0.8416, "step": 255800 }, { "epoch": 4.491125195315929, "grad_norm": 0.0606186292556797, "learning_rate": 7.222865676640397e-06, "loss": 0.8341, "step": 255810 }, { "epoch": 4.49130076019593, "grad_norm": 0.051649262310980486, "learning_rate": 7.219294580547675e-06, "loss": 0.8287, "step": 255820 }, { "epoch": 4.491476325075932, "grad_norm": 0.05138523538232476, "learning_rate": 7.215724672674459e-06, "loss": 0.8485, "step": 255830 }, { "epoch": 4.491651889955933, "grad_norm": 0.05226174916804882, "learning_rate": 7.21215595306601e-06, "loss": 0.8363, "step": 255840 }, { "epoch": 4.491827454835935, "grad_norm": 0.050133683877151405, "learning_rate": 7.208588421767501e-06, "loss": 0.8359, "step": 255850 }, { "epoch": 4.492003019715936, "grad_norm": 0.04887552434030501, "learning_rate": 7.205022078824162e-06, "loss": 0.8474, "step": 255860 }, { "epoch": 4.492178584595937, "grad_norm": 0.04379196863365765, "learning_rate": 7.201456924281153e-06, "loss": 0.8391, "step": 255870 }, { "epoch": 4.492354149475939, "grad_norm": 0.05925854767449463, "learning_rate": 7.197892958183663e-06, "loss": 0.8336, "step": 255880 }, { "epoch": 4.49252971435594, "grad_norm": 0.0512911476588675, "learning_rate": 7.194330180576864e-06, "loss": 0.8306, "step": 255890 }, { "epoch": 4.492705279235942, "grad_norm": 0.045400874318134395, "learning_rate": 7.1907685915058505e-06, "loss": 0.8487, "step": 255900 }, { "epoch": 4.492880844115943, "grad_norm": 0.05542115891179121, "learning_rate": 7.1872081910158016e-06, "loss": 0.8306, "step": 255910 }, { "epoch": 4.493056408995945, "grad_norm": 0.06926026678510917, "learning_rate": 7.183648979151797e-06, "loss": 0.8441, "step": 255920 }, { "epoch": 4.493231973875946, "grad_norm": 0.05275086816365971, "learning_rate": 7.180090955958928e-06, "loss": 0.8439, "step": 255930 }, { "epoch": 4.493407538755947, "grad_norm": 0.048170021603165664, "learning_rate": 7.176534121482312e-06, "loss": 0.8362, "step": 255940 }, { "epoch": 4.493583103635949, "grad_norm": 0.054536695784744356, "learning_rate": 7.172978475766978e-06, "loss": 0.8326, "step": 255950 }, { "epoch": 4.49375866851595, "grad_norm": 0.06815143197472791, "learning_rate": 7.169424018857991e-06, "loss": 0.8408, "step": 255960 }, { "epoch": 4.493934233395952, "grad_norm": 0.052135613879880485, "learning_rate": 7.165870750800362e-06, "loss": 0.8432, "step": 255970 }, { "epoch": 4.494109798275953, "grad_norm": 0.060537115023781875, "learning_rate": 7.162318671639162e-06, "loss": 0.8394, "step": 255980 }, { "epoch": 4.494285363155955, "grad_norm": 0.053544698758368224, "learning_rate": 7.158767781419351e-06, "loss": 0.8432, "step": 255990 }, { "epoch": 4.494460928035956, "grad_norm": 0.05326287148683299, "learning_rate": 7.1552180801859424e-06, "loss": 0.8364, "step": 256000 }, { "epoch": 4.494636492915957, "grad_norm": 0.042435971676999494, "learning_rate": 7.151669567983915e-06, "loss": 0.8434, "step": 256010 }, { "epoch": 4.494812057795959, "grad_norm": 0.05423564773219382, "learning_rate": 7.148122244858213e-06, "loss": 0.8404, "step": 256020 }, { "epoch": 4.49498762267596, "grad_norm": 0.05647859325718329, "learning_rate": 7.144576110853801e-06, "loss": 0.8345, "step": 256030 }, { "epoch": 4.495163187555962, "grad_norm": 0.0719610189803458, "learning_rate": 7.1410311660155774e-06, "loss": 0.8371, "step": 256040 }, { "epoch": 4.495338752435963, "grad_norm": 0.045628010120614335, "learning_rate": 7.1374874103884744e-06, "loss": 0.8457, "step": 256050 }, { "epoch": 4.495514317315964, "grad_norm": 0.05707883602332175, "learning_rate": 7.1339448440174235e-06, "loss": 0.8453, "step": 256060 }, { "epoch": 4.495689882195966, "grad_norm": 0.06225395815917709, "learning_rate": 7.130403466947236e-06, "loss": 0.8331, "step": 256070 }, { "epoch": 4.495865447075967, "grad_norm": 0.04740149036214051, "learning_rate": 7.126863279222866e-06, "loss": 0.8404, "step": 256080 }, { "epoch": 4.496041011955969, "grad_norm": 0.05279562726626463, "learning_rate": 7.123324280889104e-06, "loss": 0.8305, "step": 256090 }, { "epoch": 4.49621657683597, "grad_norm": 0.08789035149046412, "learning_rate": 7.119786471990826e-06, "loss": 0.8407, "step": 256100 }, { "epoch": 4.496392141715971, "grad_norm": 0.05264396270661553, "learning_rate": 7.116249852572854e-06, "loss": 0.8414, "step": 256110 }, { "epoch": 4.496567706595973, "grad_norm": 0.050110183968291776, "learning_rate": 7.112714422679978e-06, "loss": 0.8395, "step": 256120 }, { "epoch": 4.496743271475974, "grad_norm": 0.06477057860517081, "learning_rate": 7.109180182357018e-06, "loss": 0.8377, "step": 256130 }, { "epoch": 4.496918836355976, "grad_norm": 0.053923089096688484, "learning_rate": 7.105647131648711e-06, "loss": 0.8366, "step": 256140 }, { "epoch": 4.497094401235977, "grad_norm": 0.054245120974975164, "learning_rate": 7.102115270599867e-06, "loss": 0.8345, "step": 256150 }, { "epoch": 4.4972699661159785, "grad_norm": 0.046856269149141296, "learning_rate": 7.09858459925522e-06, "loss": 0.842, "step": 256160 }, { "epoch": 4.4974455309959795, "grad_norm": 0.05950618345127812, "learning_rate": 7.095055117659494e-06, "loss": 0.8366, "step": 256170 }, { "epoch": 4.497621095875981, "grad_norm": 0.05208864017073413, "learning_rate": 7.091526825857434e-06, "loss": 0.8473, "step": 256180 }, { "epoch": 4.4977966607559825, "grad_norm": 0.049049744768569506, "learning_rate": 7.087999723893706e-06, "loss": 0.8415, "step": 256190 }, { "epoch": 4.4979722256359835, "grad_norm": 0.0480585542284058, "learning_rate": 7.084473811813036e-06, "loss": 0.8406, "step": 256200 }, { "epoch": 4.4981477905159855, "grad_norm": 0.05938560044637286, "learning_rate": 7.080949089660069e-06, "loss": 0.8362, "step": 256210 }, { "epoch": 4.4983233553959865, "grad_norm": 0.049384722219456, "learning_rate": 7.077425557479474e-06, "loss": 0.8444, "step": 256220 }, { "epoch": 4.498498920275988, "grad_norm": 0.053059751106242226, "learning_rate": 7.073903215315919e-06, "loss": 0.8396, "step": 256230 }, { "epoch": 4.4986744851559894, "grad_norm": 0.06895449764633586, "learning_rate": 7.070382063213973e-06, "loss": 0.8301, "step": 256240 }, { "epoch": 4.4988500500359905, "grad_norm": 0.045128134865926685, "learning_rate": 7.066862101218326e-06, "loss": 0.8414, "step": 256250 }, { "epoch": 4.499025614915992, "grad_norm": 0.046689966038680815, "learning_rate": 7.063343329373515e-06, "loss": 0.8399, "step": 256260 }, { "epoch": 4.499201179795993, "grad_norm": 0.0557676315796469, "learning_rate": 7.059825747724154e-06, "loss": 0.8436, "step": 256270 }, { "epoch": 4.499376744675995, "grad_norm": 0.05922666965314246, "learning_rate": 7.056309356314811e-06, "loss": 0.8308, "step": 256280 }, { "epoch": 4.499552309555996, "grad_norm": 0.04995490163472403, "learning_rate": 7.052794155190023e-06, "loss": 0.8419, "step": 256290 }, { "epoch": 4.499727874435997, "grad_norm": 0.044885034597582404, "learning_rate": 7.04928014439436e-06, "loss": 0.8479, "step": 256300 }, { "epoch": 4.499903439315999, "grad_norm": 0.061715946372538945, "learning_rate": 7.04576732397228e-06, "loss": 0.8357, "step": 256310 }, { "epoch": 4.500079004196, "grad_norm": 0.05146210297199584, "learning_rate": 7.042255693968375e-06, "loss": 0.8384, "step": 256320 }, { "epoch": 4.500254569076002, "grad_norm": 0.05263534762469842, "learning_rate": 7.0387452544270716e-06, "loss": 0.8392, "step": 256330 }, { "epoch": 4.500430133956003, "grad_norm": 0.0534365899421023, "learning_rate": 7.035236005392884e-06, "loss": 0.8373, "step": 256340 }, { "epoch": 4.500605698836004, "grad_norm": 0.04757211941145551, "learning_rate": 7.031727946910272e-06, "loss": 0.842, "step": 256350 }, { "epoch": 4.500781263716006, "grad_norm": 0.061775595939195176, "learning_rate": 7.028221079023673e-06, "loss": 0.8422, "step": 256360 }, { "epoch": 4.500956828596007, "grad_norm": 0.05768431926189581, "learning_rate": 7.024715401777524e-06, "loss": 0.8404, "step": 256370 }, { "epoch": 4.501132393476009, "grad_norm": 0.05301977199770704, "learning_rate": 7.021210915216228e-06, "loss": 0.8403, "step": 256380 }, { "epoch": 4.50130795835601, "grad_norm": 0.04954471800416144, "learning_rate": 7.017707619384205e-06, "loss": 0.8349, "step": 256390 }, { "epoch": 4.501483523236012, "grad_norm": 0.04992691896497576, "learning_rate": 7.014205514325844e-06, "loss": 0.838, "step": 256400 }, { "epoch": 4.501659088116013, "grad_norm": 0.04230333265162784, "learning_rate": 7.010704600085496e-06, "loss": 0.8424, "step": 256410 }, { "epoch": 4.501834652996015, "grad_norm": 0.049443188377669166, "learning_rate": 7.007204876707557e-06, "loss": 0.8289, "step": 256420 }, { "epoch": 4.502010217876016, "grad_norm": 0.044395483391339204, "learning_rate": 7.003706344236327e-06, "loss": 0.8389, "step": 256430 }, { "epoch": 4.502185782756017, "grad_norm": 0.06956235684668577, "learning_rate": 7.000209002716161e-06, "loss": 0.8404, "step": 256440 }, { "epoch": 4.502361347636019, "grad_norm": 0.05119786093623953, "learning_rate": 6.9967128521913616e-06, "loss": 0.8335, "step": 256450 }, { "epoch": 4.50253691251602, "grad_norm": 0.04903869541373508, "learning_rate": 6.993217892706225e-06, "loss": 0.8383, "step": 256460 }, { "epoch": 4.502712477396022, "grad_norm": 0.046745633567219017, "learning_rate": 6.9897241243050445e-06, "loss": 0.8347, "step": 256470 }, { "epoch": 4.502888042276023, "grad_norm": 0.0529948217363045, "learning_rate": 6.986231547032048e-06, "loss": 0.84, "step": 256480 }, { "epoch": 4.503063607156024, "grad_norm": 0.049175746402470084, "learning_rate": 6.9827401609315415e-06, "loss": 0.8395, "step": 256490 }, { "epoch": 4.503239172036026, "grad_norm": 0.043785128824621766, "learning_rate": 6.979249966047724e-06, "loss": 0.8399, "step": 256500 }, { "epoch": 4.503414736916027, "grad_norm": 0.04050696563359873, "learning_rate": 6.975760962424829e-06, "loss": 0.8475, "step": 256510 }, { "epoch": 4.503590301796029, "grad_norm": 0.05925516788017181, "learning_rate": 6.9722731501070696e-06, "loss": 0.8371, "step": 256520 }, { "epoch": 4.50376586667603, "grad_norm": 0.0660211057077375, "learning_rate": 6.968786529138627e-06, "loss": 0.8397, "step": 256530 }, { "epoch": 4.503941431556031, "grad_norm": 0.059159057246673176, "learning_rate": 6.965301099563684e-06, "loss": 0.8426, "step": 256540 }, { "epoch": 4.504116996436033, "grad_norm": 0.07019331874694139, "learning_rate": 6.961816861426386e-06, "loss": 0.8426, "step": 256550 }, { "epoch": 4.504292561316034, "grad_norm": 0.08141681240617951, "learning_rate": 6.958333814770892e-06, "loss": 0.8415, "step": 256560 }, { "epoch": 4.504468126196036, "grad_norm": 0.06151150996389767, "learning_rate": 6.954851959641345e-06, "loss": 0.8388, "step": 256570 }, { "epoch": 4.504643691076037, "grad_norm": 0.06006422209439365, "learning_rate": 6.951371296081828e-06, "loss": 0.8359, "step": 256580 }, { "epoch": 4.504819255956039, "grad_norm": 0.04776295514034343, "learning_rate": 6.947891824136474e-06, "loss": 0.8403, "step": 256590 }, { "epoch": 4.50499482083604, "grad_norm": 0.059378217080155886, "learning_rate": 6.944413543849354e-06, "loss": 0.8375, "step": 256600 }, { "epoch": 4.505170385716041, "grad_norm": 0.0844133505663742, "learning_rate": 6.940936455264535e-06, "loss": 0.8314, "step": 256610 }, { "epoch": 4.505345950596043, "grad_norm": 0.05527674088318252, "learning_rate": 6.937460558426091e-06, "loss": 0.8335, "step": 256620 }, { "epoch": 4.505521515476044, "grad_norm": 0.04909761436415814, "learning_rate": 6.93398585337803e-06, "loss": 0.8379, "step": 256630 }, { "epoch": 4.505697080356046, "grad_norm": 0.06106570454410188, "learning_rate": 6.930512340164416e-06, "loss": 0.8418, "step": 256640 }, { "epoch": 4.505872645236047, "grad_norm": 0.051847763459794756, "learning_rate": 6.927040018829215e-06, "loss": 0.8376, "step": 256650 }, { "epoch": 4.506048210116049, "grad_norm": 0.07496532487970543, "learning_rate": 6.923568889416468e-06, "loss": 0.8331, "step": 256660 }, { "epoch": 4.50622377499605, "grad_norm": 0.05375275256876201, "learning_rate": 6.920098951970116e-06, "loss": 0.8343, "step": 256670 }, { "epoch": 4.506399339876051, "grad_norm": 0.040282080953007, "learning_rate": 6.916630206534147e-06, "loss": 0.843, "step": 256680 }, { "epoch": 4.506574904756053, "grad_norm": 0.0640866204213749, "learning_rate": 6.913162653152516e-06, "loss": 0.8301, "step": 256690 }, { "epoch": 4.506750469636054, "grad_norm": 0.05039941170067501, "learning_rate": 6.909696291869133e-06, "loss": 0.8329, "step": 256700 }, { "epoch": 4.506926034516056, "grad_norm": 0.05537971561636846, "learning_rate": 6.906231122727938e-06, "loss": 0.8358, "step": 256710 }, { "epoch": 4.507101599396057, "grad_norm": 0.058537759614135446, "learning_rate": 6.9027671457728235e-06, "loss": 0.8365, "step": 256720 }, { "epoch": 4.507277164276058, "grad_norm": 0.053353671921219224, "learning_rate": 6.899304361047673e-06, "loss": 0.8367, "step": 256730 }, { "epoch": 4.50745272915606, "grad_norm": 0.052032759038481785, "learning_rate": 6.895842768596388e-06, "loss": 0.8426, "step": 256740 }, { "epoch": 4.507628294036061, "grad_norm": 0.040754222099338475, "learning_rate": 6.892382368462788e-06, "loss": 0.834, "step": 256750 }, { "epoch": 4.507803858916063, "grad_norm": 0.05564717563513395, "learning_rate": 6.888923160690763e-06, "loss": 0.8365, "step": 256760 }, { "epoch": 4.507979423796064, "grad_norm": 0.048164149708132, "learning_rate": 6.885465145324102e-06, "loss": 0.8438, "step": 256770 }, { "epoch": 4.508154988676065, "grad_norm": 0.050788744976370044, "learning_rate": 6.882008322406638e-06, "loss": 0.8448, "step": 256780 }, { "epoch": 4.508330553556067, "grad_norm": 0.04428173638226006, "learning_rate": 6.878552691982182e-06, "loss": 0.8333, "step": 256790 }, { "epoch": 4.508506118436068, "grad_norm": 0.054384271765745264, "learning_rate": 6.875098254094475e-06, "loss": 0.8356, "step": 256800 }, { "epoch": 4.50868168331607, "grad_norm": 0.053358887498011834, "learning_rate": 6.871645008787333e-06, "loss": 0.8394, "step": 256810 }, { "epoch": 4.508857248196071, "grad_norm": 0.05844411111161257, "learning_rate": 6.868192956104469e-06, "loss": 0.8358, "step": 256820 }, { "epoch": 4.509032813076073, "grad_norm": 0.047908378462212936, "learning_rate": 6.864742096089655e-06, "loss": 0.8476, "step": 256830 }, { "epoch": 4.509208377956074, "grad_norm": 0.0639151005872408, "learning_rate": 6.861292428786598e-06, "loss": 0.8332, "step": 256840 }, { "epoch": 4.509383942836075, "grad_norm": 0.04719328024580165, "learning_rate": 6.8578439542390045e-06, "loss": 0.8392, "step": 256850 }, { "epoch": 4.509559507716077, "grad_norm": 0.05185762621399299, "learning_rate": 6.854396672490579e-06, "loss": 0.8445, "step": 256860 }, { "epoch": 4.509735072596078, "grad_norm": 0.06711378173695523, "learning_rate": 6.85095058358498e-06, "loss": 0.8394, "step": 256870 }, { "epoch": 4.50991063747608, "grad_norm": 0.048119284824122976, "learning_rate": 6.847505687565894e-06, "loss": 0.8396, "step": 256880 }, { "epoch": 4.510086202356081, "grad_norm": 0.08937671928987448, "learning_rate": 6.844061984476935e-06, "loss": 0.8352, "step": 256890 }, { "epoch": 4.5102617672360825, "grad_norm": 0.057293764221349644, "learning_rate": 6.840619474361762e-06, "loss": 0.8403, "step": 256900 }, { "epoch": 4.5104373321160836, "grad_norm": 0.05570860406316803, "learning_rate": 6.83717815726401e-06, "loss": 0.8409, "step": 256910 }, { "epoch": 4.510612896996085, "grad_norm": 0.06601556289840636, "learning_rate": 6.83373803322721e-06, "loss": 0.832, "step": 256920 }, { "epoch": 4.5107884618760865, "grad_norm": 0.05055567197684911, "learning_rate": 6.830299102295038e-06, "loss": 0.8381, "step": 256930 }, { "epoch": 4.5109640267560875, "grad_norm": 0.043570886346580476, "learning_rate": 6.826861364511003e-06, "loss": 0.8344, "step": 256940 }, { "epoch": 4.5111395916360895, "grad_norm": 0.05829616468258409, "learning_rate": 6.823424819918697e-06, "loss": 0.8322, "step": 256950 }, { "epoch": 4.5113151565160905, "grad_norm": 0.05266682140643785, "learning_rate": 6.819989468561636e-06, "loss": 0.8356, "step": 256960 }, { "epoch": 4.5114907213960915, "grad_norm": 0.057372281646934926, "learning_rate": 6.816555310483353e-06, "loss": 0.8404, "step": 256970 }, { "epoch": 4.5116662862760935, "grad_norm": 0.054823612973248656, "learning_rate": 6.813122345727388e-06, "loss": 0.8333, "step": 256980 }, { "epoch": 4.5118418511560945, "grad_norm": 0.0475331810358645, "learning_rate": 6.809690574337174e-06, "loss": 0.8368, "step": 256990 }, { "epoch": 4.512017416036096, "grad_norm": 0.06620111258697498, "learning_rate": 6.806259996356274e-06, "loss": 0.8312, "step": 257000 }, { "epoch": 4.512192980916097, "grad_norm": 0.042129526829830316, "learning_rate": 6.802830611828087e-06, "loss": 0.8352, "step": 257010 }, { "epoch": 4.5123685457960985, "grad_norm": 0.047385438993787124, "learning_rate": 6.799402420796086e-06, "loss": 0.8374, "step": 257020 }, { "epoch": 4.5125441106761, "grad_norm": 0.05635656122161265, "learning_rate": 6.795975423303733e-06, "loss": 0.8343, "step": 257030 }, { "epoch": 4.512719675556101, "grad_norm": 0.05219243525502161, "learning_rate": 6.792549619394403e-06, "loss": 0.8405, "step": 257040 }, { "epoch": 4.512895240436103, "grad_norm": 0.05184508716211594, "learning_rate": 6.789125009111541e-06, "loss": 0.8398, "step": 257050 }, { "epoch": 4.513070805316104, "grad_norm": 0.05786951720183779, "learning_rate": 6.785701592498514e-06, "loss": 0.8378, "step": 257060 }, { "epoch": 4.513246370196106, "grad_norm": 0.05431527187184887, "learning_rate": 6.782279369598694e-06, "loss": 0.8372, "step": 257070 }, { "epoch": 4.513421935076107, "grad_norm": 0.05769669503440229, "learning_rate": 6.7788583404554755e-06, "loss": 0.8386, "step": 257080 }, { "epoch": 4.513597499956109, "grad_norm": 0.05214133555965386, "learning_rate": 6.77543850511214e-06, "loss": 0.8283, "step": 257090 }, { "epoch": 4.51377306483611, "grad_norm": 0.06514690304522829, "learning_rate": 6.772019863612093e-06, "loss": 0.8357, "step": 257100 }, { "epoch": 4.513948629716111, "grad_norm": 0.06964475488286119, "learning_rate": 6.768602415998596e-06, "loss": 0.8387, "step": 257110 }, { "epoch": 4.514124194596113, "grad_norm": 0.05325422971143329, "learning_rate": 6.7651861623149764e-06, "loss": 0.8439, "step": 257120 }, { "epoch": 4.514299759476114, "grad_norm": 0.04369324085136749, "learning_rate": 6.761771102604505e-06, "loss": 0.8385, "step": 257130 }, { "epoch": 4.514475324356116, "grad_norm": 0.059455090352139535, "learning_rate": 6.758357236910445e-06, "loss": 0.8349, "step": 257140 }, { "epoch": 4.514650889236117, "grad_norm": 0.04542073094222972, "learning_rate": 6.754944565276078e-06, "loss": 0.8426, "step": 257150 }, { "epoch": 4.514826454116118, "grad_norm": 0.044609823499298075, "learning_rate": 6.751533087744591e-06, "loss": 0.8345, "step": 257160 }, { "epoch": 4.51500201899612, "grad_norm": 0.05954310838409795, "learning_rate": 6.7481228043592774e-06, "loss": 0.8417, "step": 257170 }, { "epoch": 4.515177583876121, "grad_norm": 0.049744313113648755, "learning_rate": 6.744713715163298e-06, "loss": 0.8294, "step": 257180 }, { "epoch": 4.515353148756123, "grad_norm": 0.042360209957226946, "learning_rate": 6.741305820199849e-06, "loss": 0.84, "step": 257190 }, { "epoch": 4.515528713636124, "grad_norm": 0.0737666854592852, "learning_rate": 6.737899119512138e-06, "loss": 0.8461, "step": 257200 }, { "epoch": 4.515704278516125, "grad_norm": 0.05296903038627075, "learning_rate": 6.734493613143305e-06, "loss": 0.8434, "step": 257210 }, { "epoch": 4.515879843396127, "grad_norm": 0.058798247421625725, "learning_rate": 6.731089301136512e-06, "loss": 0.8371, "step": 257220 }, { "epoch": 4.516055408276128, "grad_norm": 0.052831977675073284, "learning_rate": 6.727686183534868e-06, "loss": 0.8308, "step": 257230 }, { "epoch": 4.51623097315613, "grad_norm": 0.054728737212779315, "learning_rate": 6.724284260381511e-06, "loss": 0.832, "step": 257240 }, { "epoch": 4.516406538036131, "grad_norm": 0.0413277229994161, "learning_rate": 6.720883531719549e-06, "loss": 0.8385, "step": 257250 }, { "epoch": 4.516582102916132, "grad_norm": 0.05875599614554673, "learning_rate": 6.717483997592047e-06, "loss": 0.8375, "step": 257260 }, { "epoch": 4.516757667796134, "grad_norm": 0.05219769531794767, "learning_rate": 6.714085658042102e-06, "loss": 0.8325, "step": 257270 }, { "epoch": 4.516933232676135, "grad_norm": 0.0520128095188777, "learning_rate": 6.710688513112765e-06, "loss": 0.8312, "step": 257280 }, { "epoch": 4.517108797556137, "grad_norm": 0.047658410494105394, "learning_rate": 6.707292562847075e-06, "loss": 0.8398, "step": 257290 }, { "epoch": 4.517284362436138, "grad_norm": 0.05889772302999428, "learning_rate": 6.7038978072880485e-06, "loss": 0.8409, "step": 257300 }, { "epoch": 4.51745992731614, "grad_norm": 0.060531284547080136, "learning_rate": 6.700504246478709e-06, "loss": 0.8405, "step": 257310 }, { "epoch": 4.517635492196141, "grad_norm": 0.05879089860924705, "learning_rate": 6.697111880462066e-06, "loss": 0.8352, "step": 257320 }, { "epoch": 4.517811057076143, "grad_norm": 0.04611361438385633, "learning_rate": 6.693720709281065e-06, "loss": 0.8361, "step": 257330 }, { "epoch": 4.517986621956144, "grad_norm": 0.07308626006044867, "learning_rate": 6.690330732978724e-06, "loss": 0.8379, "step": 257340 }, { "epoch": 4.518162186836145, "grad_norm": 0.049087449113620396, "learning_rate": 6.6869419515979524e-06, "loss": 0.8416, "step": 257350 }, { "epoch": 4.518337751716147, "grad_norm": 0.05071235587671142, "learning_rate": 6.683554365181693e-06, "loss": 0.8336, "step": 257360 }, { "epoch": 4.518513316596148, "grad_norm": 0.06349673123232821, "learning_rate": 6.6801679737728995e-06, "loss": 0.8349, "step": 257370 }, { "epoch": 4.51868888147615, "grad_norm": 0.05067338666005322, "learning_rate": 6.676782777414436e-06, "loss": 0.8411, "step": 257380 }, { "epoch": 4.518864446356151, "grad_norm": 0.05031431390344126, "learning_rate": 6.673398776149229e-06, "loss": 0.8428, "step": 257390 }, { "epoch": 4.519040011236152, "grad_norm": 0.05434962535691771, "learning_rate": 6.670015970020116e-06, "loss": 0.8309, "step": 257400 }, { "epoch": 4.519215576116154, "grad_norm": 0.05098508186924281, "learning_rate": 6.666634359069986e-06, "loss": 0.8328, "step": 257410 }, { "epoch": 4.519391140996155, "grad_norm": 0.04973129032069178, "learning_rate": 6.663253943341694e-06, "loss": 0.8416, "step": 257420 }, { "epoch": 4.519566705876157, "grad_norm": 0.04738427604911563, "learning_rate": 6.659874722878021e-06, "loss": 0.8375, "step": 257430 }, { "epoch": 4.519742270756158, "grad_norm": 0.06336267220885693, "learning_rate": 6.656496697721838e-06, "loss": 0.8345, "step": 257440 }, { "epoch": 4.519917835636159, "grad_norm": 0.056824047372132425, "learning_rate": 6.653119867915916e-06, "loss": 0.8355, "step": 257450 }, { "epoch": 4.520093400516161, "grad_norm": 0.05531923979808199, "learning_rate": 6.649744233503065e-06, "loss": 0.8373, "step": 257460 }, { "epoch": 4.520268965396162, "grad_norm": 0.052778704760012944, "learning_rate": 6.646369794526018e-06, "loss": 0.831, "step": 257470 }, { "epoch": 4.520444530276164, "grad_norm": 0.04843023038586308, "learning_rate": 6.642996551027542e-06, "loss": 0.8305, "step": 257480 }, { "epoch": 4.520620095156165, "grad_norm": 0.0486834365878468, "learning_rate": 6.639624503050404e-06, "loss": 0.8457, "step": 257490 }, { "epoch": 4.520795660036167, "grad_norm": 0.0461806464361591, "learning_rate": 6.6362536506372835e-06, "loss": 0.8382, "step": 257500 }, { "epoch": 4.520971224916168, "grad_norm": 0.07134296950141081, "learning_rate": 6.632883993830935e-06, "loss": 0.8411, "step": 257510 }, { "epoch": 4.521146789796169, "grad_norm": 0.05695764214658916, "learning_rate": 6.629515532674016e-06, "loss": 0.836, "step": 257520 }, { "epoch": 4.521322354676171, "grad_norm": 0.045295798499162425, "learning_rate": 6.6261482672092254e-06, "loss": 0.8444, "step": 257530 }, { "epoch": 4.521497919556172, "grad_norm": 0.0555792302044756, "learning_rate": 6.622782197479235e-06, "loss": 0.8404, "step": 257540 }, { "epoch": 4.521673484436174, "grad_norm": 0.04968261866294143, "learning_rate": 6.619417323526666e-06, "loss": 0.8338, "step": 257550 }, { "epoch": 4.521849049316175, "grad_norm": 0.046881744731230926, "learning_rate": 6.616053645394188e-06, "loss": 0.837, "step": 257560 }, { "epoch": 4.522024614196177, "grad_norm": 0.044770261223333614, "learning_rate": 6.61269116312438e-06, "loss": 0.8424, "step": 257570 }, { "epoch": 4.522200179076178, "grad_norm": 0.04595529214594434, "learning_rate": 6.609329876759865e-06, "loss": 0.8418, "step": 257580 }, { "epoch": 4.522375743956179, "grad_norm": 0.048547534646282905, "learning_rate": 6.605969786343245e-06, "loss": 0.838, "step": 257590 }, { "epoch": 4.522551308836181, "grad_norm": 0.05031621094665321, "learning_rate": 6.602610891917047e-06, "loss": 0.8413, "step": 257600 }, { "epoch": 4.522726873716182, "grad_norm": 0.06899088035946838, "learning_rate": 6.599253193523894e-06, "loss": 0.8364, "step": 257610 }, { "epoch": 4.522902438596184, "grad_norm": 0.06301881039037725, "learning_rate": 6.595896691206275e-06, "loss": 0.837, "step": 257620 }, { "epoch": 4.523078003476185, "grad_norm": 0.06519954251667955, "learning_rate": 6.592541385006754e-06, "loss": 0.8245, "step": 257630 }, { "epoch": 4.523253568356186, "grad_norm": 0.04488714594150446, "learning_rate": 6.589187274967817e-06, "loss": 0.826, "step": 257640 }, { "epoch": 4.523429133236188, "grad_norm": 0.06303144160185384, "learning_rate": 6.585834361131959e-06, "loss": 0.8353, "step": 257650 }, { "epoch": 4.523604698116189, "grad_norm": 0.059338296901475206, "learning_rate": 6.5824826435417054e-06, "loss": 0.8351, "step": 257660 }, { "epoch": 4.5237802629961905, "grad_norm": 0.05235425426686765, "learning_rate": 6.579132122239448e-06, "loss": 0.8343, "step": 257670 }, { "epoch": 4.5239558278761915, "grad_norm": 0.049108009296135166, "learning_rate": 6.575782797267724e-06, "loss": 0.8391, "step": 257680 }, { "epoch": 4.524131392756193, "grad_norm": 0.06121734928926586, "learning_rate": 6.572434668668903e-06, "loss": 0.8347, "step": 257690 }, { "epoch": 4.5243069576361945, "grad_norm": 0.05195754765625715, "learning_rate": 6.569087736485433e-06, "loss": 0.8455, "step": 257700 }, { "epoch": 4.5244825225161955, "grad_norm": 0.05128502220680411, "learning_rate": 6.5657420007597305e-06, "loss": 0.8335, "step": 257710 }, { "epoch": 4.5246580873961975, "grad_norm": 0.04564897239077919, "learning_rate": 6.562397461534167e-06, "loss": 0.8396, "step": 257720 }, { "epoch": 4.5248336522761985, "grad_norm": 0.06327999605675556, "learning_rate": 6.559054118851143e-06, "loss": 0.8398, "step": 257730 }, { "epoch": 4.5250092171562, "grad_norm": 0.051049478144168936, "learning_rate": 6.5557119727529915e-06, "loss": 0.8455, "step": 257740 }, { "epoch": 4.5251847820362014, "grad_norm": 0.04493944649687119, "learning_rate": 6.552371023282056e-06, "loss": 0.8351, "step": 257750 }, { "epoch": 4.525360346916203, "grad_norm": 0.06298421911376387, "learning_rate": 6.5490312704807e-06, "loss": 0.8376, "step": 257760 }, { "epoch": 4.525535911796204, "grad_norm": 0.04707535355462161, "learning_rate": 6.545692714391194e-06, "loss": 0.8394, "step": 257770 }, { "epoch": 4.525711476676205, "grad_norm": 0.0506834229274037, "learning_rate": 6.5423553550558984e-06, "loss": 0.8409, "step": 257780 }, { "epoch": 4.525887041556207, "grad_norm": 0.05177421850613353, "learning_rate": 6.539019192517043e-06, "loss": 0.8373, "step": 257790 }, { "epoch": 4.526062606436208, "grad_norm": 0.05364285231899406, "learning_rate": 6.535684226816921e-06, "loss": 0.8381, "step": 257800 }, { "epoch": 4.52623817131621, "grad_norm": 0.07329205593110029, "learning_rate": 6.532350457997781e-06, "loss": 0.8386, "step": 257810 }, { "epoch": 4.526413736196211, "grad_norm": 0.04143325726331699, "learning_rate": 6.529017886101864e-06, "loss": 0.8373, "step": 257820 }, { "epoch": 4.526589301076212, "grad_norm": 0.05623834207623918, "learning_rate": 6.525686511171408e-06, "loss": 0.8395, "step": 257830 }, { "epoch": 4.526764865956214, "grad_norm": 0.06893938801104793, "learning_rate": 6.522356333248588e-06, "loss": 0.8403, "step": 257840 }, { "epoch": 4.526940430836215, "grad_norm": 0.050634238695703965, "learning_rate": 6.519027352375642e-06, "loss": 0.8361, "step": 257850 }, { "epoch": 4.527115995716217, "grad_norm": 0.06811512578099445, "learning_rate": 6.5156995685947106e-06, "loss": 0.83, "step": 257860 }, { "epoch": 4.527291560596218, "grad_norm": 0.07703054372732084, "learning_rate": 6.512372981947979e-06, "loss": 0.8453, "step": 257870 }, { "epoch": 4.527467125476219, "grad_norm": 0.05006134035127573, "learning_rate": 6.509047592477595e-06, "loss": 0.8454, "step": 257880 }, { "epoch": 4.527642690356221, "grad_norm": 0.054204231354204935, "learning_rate": 6.50572340022568e-06, "loss": 0.8399, "step": 257890 }, { "epoch": 4.527818255236222, "grad_norm": 0.05956319179409437, "learning_rate": 6.502400405234374e-06, "loss": 0.8427, "step": 257900 }, { "epoch": 4.527993820116224, "grad_norm": 0.07473085770623586, "learning_rate": 6.499078607545753e-06, "loss": 0.8283, "step": 257910 }, { "epoch": 4.528169384996225, "grad_norm": 0.054851272385107806, "learning_rate": 6.495758007201919e-06, "loss": 0.8403, "step": 257920 }, { "epoch": 4.528344949876226, "grad_norm": 0.047839352727130906, "learning_rate": 6.492438604244952e-06, "loss": 0.8384, "step": 257930 }, { "epoch": 4.528520514756228, "grad_norm": 0.05158213641647173, "learning_rate": 6.489120398716869e-06, "loss": 0.8369, "step": 257940 }, { "epoch": 4.528696079636229, "grad_norm": 0.055634069980786634, "learning_rate": 6.485803390659776e-06, "loss": 0.8427, "step": 257950 }, { "epoch": 4.528871644516231, "grad_norm": 0.051049315460870985, "learning_rate": 6.482487580115651e-06, "loss": 0.8378, "step": 257960 }, { "epoch": 4.529047209396232, "grad_norm": 0.03956171145846347, "learning_rate": 6.479172967126533e-06, "loss": 0.8385, "step": 257970 }, { "epoch": 4.529222774276234, "grad_norm": 0.056459770295316174, "learning_rate": 6.475859551734399e-06, "loss": 0.839, "step": 257980 }, { "epoch": 4.529398339156235, "grad_norm": 0.049409455084437205, "learning_rate": 6.472547333981246e-06, "loss": 0.8374, "step": 257990 }, { "epoch": 4.529573904036237, "grad_norm": 0.05478855116889959, "learning_rate": 6.469236313909038e-06, "loss": 0.8258, "step": 258000 }, { "epoch": 4.529749468916238, "grad_norm": 0.04112849010977086, "learning_rate": 6.465926491559697e-06, "loss": 0.8328, "step": 258010 }, { "epoch": 4.529925033796239, "grad_norm": 0.044113158199913245, "learning_rate": 6.4626178669752195e-06, "loss": 0.8379, "step": 258020 }, { "epoch": 4.530100598676241, "grad_norm": 0.05582904087357936, "learning_rate": 6.4593104401974695e-06, "loss": 0.8452, "step": 258030 }, { "epoch": 4.530276163556242, "grad_norm": 0.061652707091803004, "learning_rate": 6.4560042112683805e-06, "loss": 0.8432, "step": 258040 }, { "epoch": 4.530451728436244, "grad_norm": 0.05191184298141779, "learning_rate": 6.45269918022985e-06, "loss": 0.8338, "step": 258050 }, { "epoch": 4.530627293316245, "grad_norm": 0.04274364686212786, "learning_rate": 6.449395347123721e-06, "loss": 0.8474, "step": 258060 }, { "epoch": 4.530802858196246, "grad_norm": 0.06093273660454719, "learning_rate": 6.446092711991894e-06, "loss": 0.8365, "step": 258070 }, { "epoch": 4.530978423076248, "grad_norm": 0.06526108097574959, "learning_rate": 6.442791274876178e-06, "loss": 0.8318, "step": 258080 }, { "epoch": 4.531153987956249, "grad_norm": 0.043380871103582214, "learning_rate": 6.439491035818418e-06, "loss": 0.8306, "step": 258090 }, { "epoch": 4.531329552836251, "grad_norm": 0.046505622584656986, "learning_rate": 6.436191994860444e-06, "loss": 0.8407, "step": 258100 }, { "epoch": 4.531505117716252, "grad_norm": 0.06484022361763504, "learning_rate": 6.4328941520440125e-06, "loss": 0.8404, "step": 258110 }, { "epoch": 4.531680682596253, "grad_norm": 0.056458781243714985, "learning_rate": 6.429597507410957e-06, "loss": 0.8475, "step": 258120 }, { "epoch": 4.531856247476255, "grad_norm": 0.05132554904543629, "learning_rate": 6.426302061003023e-06, "loss": 0.8373, "step": 258130 }, { "epoch": 4.532031812356256, "grad_norm": 0.051470419299153615, "learning_rate": 6.423007812861972e-06, "loss": 0.8386, "step": 258140 }, { "epoch": 4.532207377236258, "grad_norm": 0.04699862189222076, "learning_rate": 6.419714763029534e-06, "loss": 0.8395, "step": 258150 }, { "epoch": 4.532382942116259, "grad_norm": 0.0524439446787785, "learning_rate": 6.416422911547426e-06, "loss": 0.8403, "step": 258160 }, { "epoch": 4.532558506996261, "grad_norm": 0.06308370229325212, "learning_rate": 6.4131322584573855e-06, "loss": 0.8374, "step": 258170 }, { "epoch": 4.532734071876262, "grad_norm": 0.04442638358957028, "learning_rate": 6.4098428038010555e-06, "loss": 0.8459, "step": 258180 }, { "epoch": 4.532909636756263, "grad_norm": 0.05781057244538682, "learning_rate": 6.40655454762017e-06, "loss": 0.8412, "step": 258190 }, { "epoch": 4.533085201636265, "grad_norm": 0.055437823976226194, "learning_rate": 6.403267489956363e-06, "loss": 0.8394, "step": 258200 }, { "epoch": 4.533260766516266, "grad_norm": 0.04644450546432996, "learning_rate": 6.399981630851283e-06, "loss": 0.8352, "step": 258210 }, { "epoch": 4.533436331396268, "grad_norm": 0.05023993562057638, "learning_rate": 6.3966969703465744e-06, "loss": 0.8415, "step": 258220 }, { "epoch": 4.533611896276269, "grad_norm": 0.055741658363992364, "learning_rate": 6.393413508483836e-06, "loss": 0.8446, "step": 258230 }, { "epoch": 4.533787461156271, "grad_norm": 0.04910165263183902, "learning_rate": 6.390131245304695e-06, "loss": 0.8391, "step": 258240 }, { "epoch": 4.533963026036272, "grad_norm": 0.07297516685599904, "learning_rate": 6.386850180850698e-06, "loss": 0.8384, "step": 258250 }, { "epoch": 4.534138590916273, "grad_norm": 0.06158198495685985, "learning_rate": 6.383570315163456e-06, "loss": 0.832, "step": 258260 }, { "epoch": 4.534314155796275, "grad_norm": 0.04739337567926968, "learning_rate": 6.3802916482845174e-06, "loss": 0.8398, "step": 258270 }, { "epoch": 4.534489720676276, "grad_norm": 0.04644365541254171, "learning_rate": 6.377014180255394e-06, "loss": 0.8362, "step": 258280 }, { "epoch": 4.534665285556278, "grad_norm": 0.04847315748847859, "learning_rate": 6.3737379111176584e-06, "loss": 0.8306, "step": 258290 }, { "epoch": 4.534840850436279, "grad_norm": 0.052753523855847195, "learning_rate": 6.370462840912797e-06, "loss": 0.8368, "step": 258300 }, { "epoch": 4.53501641531628, "grad_norm": 0.06872268249568514, "learning_rate": 6.367188969682305e-06, "loss": 0.8426, "step": 258310 }, { "epoch": 4.535191980196282, "grad_norm": 0.05190077769447328, "learning_rate": 6.363916297467665e-06, "loss": 0.8288, "step": 258320 }, { "epoch": 4.535367545076283, "grad_norm": 0.05659877477786072, "learning_rate": 6.36064482431033e-06, "loss": 0.8486, "step": 258330 }, { "epoch": 4.535543109956285, "grad_norm": 0.05489044538146064, "learning_rate": 6.357374550251775e-06, "loss": 0.8397, "step": 258340 }, { "epoch": 4.535718674836286, "grad_norm": 0.042737012831216434, "learning_rate": 6.354105475333414e-06, "loss": 0.8429, "step": 258350 }, { "epoch": 4.535894239716287, "grad_norm": 0.051165063547542515, "learning_rate": 6.350837599596682e-06, "loss": 0.8382, "step": 258360 }, { "epoch": 4.536069804596289, "grad_norm": 0.08014737001530298, "learning_rate": 6.347570923082971e-06, "loss": 0.838, "step": 258370 }, { "epoch": 4.53624536947629, "grad_norm": 0.07663645659132963, "learning_rate": 6.344305445833678e-06, "loss": 0.8436, "step": 258380 }, { "epoch": 4.536420934356292, "grad_norm": 0.05455254978203809, "learning_rate": 6.341041167890193e-06, "loss": 0.8325, "step": 258390 }, { "epoch": 4.536596499236293, "grad_norm": 0.05414107602183725, "learning_rate": 6.337778089293833e-06, "loss": 0.8362, "step": 258400 }, { "epoch": 4.5367720641162945, "grad_norm": 0.04424690985420161, "learning_rate": 6.33451621008599e-06, "loss": 0.842, "step": 258410 }, { "epoch": 4.536947628996296, "grad_norm": 0.055224547377774005, "learning_rate": 6.331255530307958e-06, "loss": 0.8381, "step": 258420 }, { "epoch": 4.5371231938762975, "grad_norm": 0.05006852295295286, "learning_rate": 6.327996050001056e-06, "loss": 0.8356, "step": 258430 }, { "epoch": 4.5372987587562985, "grad_norm": 0.05109983569842612, "learning_rate": 6.3247377692065954e-06, "loss": 0.8421, "step": 258440 }, { "epoch": 4.5374743236362995, "grad_norm": 0.04881460185788697, "learning_rate": 6.321480687965828e-06, "loss": 0.835, "step": 258450 }, { "epoch": 4.5376498885163015, "grad_norm": 0.05873756913646965, "learning_rate": 6.3182248063200805e-06, "loss": 0.8382, "step": 258460 }, { "epoch": 4.5378254533963025, "grad_norm": 0.053844294816562825, "learning_rate": 6.31497012431055e-06, "loss": 0.8405, "step": 258470 }, { "epoch": 4.538001018276304, "grad_norm": 0.04919332965110138, "learning_rate": 6.311716641978506e-06, "loss": 0.8384, "step": 258480 }, { "epoch": 4.5381765831563055, "grad_norm": 0.05179570062971542, "learning_rate": 6.308464359365154e-06, "loss": 0.843, "step": 258490 }, { "epoch": 4.5383521480363065, "grad_norm": 0.04434693391872347, "learning_rate": 6.305213276511704e-06, "loss": 0.8312, "step": 258500 }, { "epoch": 4.538527712916308, "grad_norm": 0.05076746840655468, "learning_rate": 6.3019633934593575e-06, "loss": 0.837, "step": 258510 }, { "epoch": 4.538703277796309, "grad_norm": 0.04870904003935724, "learning_rate": 6.298714710249258e-06, "loss": 0.8382, "step": 258520 }, { "epoch": 4.538878842676311, "grad_norm": 0.06668367478618586, "learning_rate": 6.29546722692262e-06, "loss": 0.8372, "step": 258530 }, { "epoch": 4.539054407556312, "grad_norm": 0.053036512408864867, "learning_rate": 6.292220943520552e-06, "loss": 0.8407, "step": 258540 }, { "epoch": 4.539229972436313, "grad_norm": 0.08281141935591595, "learning_rate": 6.2889758600841954e-06, "loss": 0.8346, "step": 258550 }, { "epoch": 4.539405537316315, "grad_norm": 0.04809843195497083, "learning_rate": 6.285731976654674e-06, "loss": 0.841, "step": 258560 }, { "epoch": 4.539581102196316, "grad_norm": 0.05392355154352116, "learning_rate": 6.2824892932730695e-06, "loss": 0.8352, "step": 258570 }, { "epoch": 4.539756667076318, "grad_norm": 0.04770645568317571, "learning_rate": 6.279247809980493e-06, "loss": 0.8357, "step": 258580 }, { "epoch": 4.539932231956319, "grad_norm": 0.05531420595169237, "learning_rate": 6.276007526817991e-06, "loss": 0.8393, "step": 258590 }, { "epoch": 4.54010779683632, "grad_norm": 0.04882301701710628, "learning_rate": 6.272768443826613e-06, "loss": 0.8412, "step": 258600 }, { "epoch": 4.540283361716322, "grad_norm": 0.06346319805107963, "learning_rate": 6.269530561047444e-06, "loss": 0.8347, "step": 258610 }, { "epoch": 4.540458926596323, "grad_norm": 0.044801562065619624, "learning_rate": 6.266293878521438e-06, "loss": 0.8412, "step": 258620 }, { "epoch": 4.540634491476325, "grad_norm": 0.04919694380774686, "learning_rate": 6.263058396289681e-06, "loss": 0.8437, "step": 258630 }, { "epoch": 4.540810056356326, "grad_norm": 0.047283620493168206, "learning_rate": 6.259824114393102e-06, "loss": 0.8431, "step": 258640 }, { "epoch": 4.540985621236328, "grad_norm": 0.05516595888685987, "learning_rate": 6.256591032872733e-06, "loss": 0.8414, "step": 258650 }, { "epoch": 4.541161186116329, "grad_norm": 0.05716566974476635, "learning_rate": 6.253359151769506e-06, "loss": 0.8416, "step": 258660 }, { "epoch": 4.541336750996331, "grad_norm": 0.055147490383936966, "learning_rate": 6.25012847112436e-06, "loss": 0.8393, "step": 258670 }, { "epoch": 4.541512315876332, "grad_norm": 0.0611267836572989, "learning_rate": 6.2468989909782724e-06, "loss": 0.8378, "step": 258680 }, { "epoch": 4.541687880756333, "grad_norm": 0.0690722910826504, "learning_rate": 6.2436707113720975e-06, "loss": 0.8395, "step": 258690 }, { "epoch": 4.541863445636335, "grad_norm": 0.04783580549515059, "learning_rate": 6.24044363234681e-06, "loss": 0.8357, "step": 258700 }, { "epoch": 4.542039010516336, "grad_norm": 0.04892832847340393, "learning_rate": 6.237217753943244e-06, "loss": 0.8407, "step": 258710 }, { "epoch": 4.542214575396338, "grad_norm": 0.05739701599485136, "learning_rate": 6.233993076202295e-06, "loss": 0.8416, "step": 258720 }, { "epoch": 4.542390140276339, "grad_norm": 0.04997391924325357, "learning_rate": 6.230769599164818e-06, "loss": 0.8467, "step": 258730 }, { "epoch": 4.54256570515634, "grad_norm": 0.056242343897485676, "learning_rate": 6.227547322871647e-06, "loss": 0.8379, "step": 258740 }, { "epoch": 4.542741270036342, "grad_norm": 0.04747605977614506, "learning_rate": 6.224326247363625e-06, "loss": 0.8403, "step": 258750 }, { "epoch": 4.542916834916343, "grad_norm": 0.06948122107070676, "learning_rate": 6.221106372681549e-06, "loss": 0.8312, "step": 258760 }, { "epoch": 4.543092399796345, "grad_norm": 0.058444218659340345, "learning_rate": 6.217887698866211e-06, "loss": 0.8333, "step": 258770 }, { "epoch": 4.543267964676346, "grad_norm": 0.06803311508451354, "learning_rate": 6.2146702259584185e-06, "loss": 0.8375, "step": 258780 }, { "epoch": 4.543443529556347, "grad_norm": 0.06047064063841801, "learning_rate": 6.211453953998883e-06, "loss": 0.841, "step": 258790 }, { "epoch": 4.543619094436349, "grad_norm": 0.04695808763661924, "learning_rate": 6.208238883028428e-06, "loss": 0.844, "step": 258800 }, { "epoch": 4.54379465931635, "grad_norm": 0.04642524793803961, "learning_rate": 6.205025013087741e-06, "loss": 0.8386, "step": 258810 }, { "epoch": 4.543970224196352, "grad_norm": 0.05048710834300065, "learning_rate": 6.201812344217556e-06, "loss": 0.8373, "step": 258820 }, { "epoch": 4.544145789076353, "grad_norm": 0.06689854134867647, "learning_rate": 6.198600876458573e-06, "loss": 0.8376, "step": 258830 }, { "epoch": 4.544321353956355, "grad_norm": 0.07432313989923793, "learning_rate": 6.195390609851483e-06, "loss": 0.8314, "step": 258840 }, { "epoch": 4.544496918836356, "grad_norm": 0.05006862727867589, "learning_rate": 6.192181544436985e-06, "loss": 0.8283, "step": 258850 }, { "epoch": 4.544672483716357, "grad_norm": 0.05383384917774438, "learning_rate": 6.188973680255681e-06, "loss": 0.8287, "step": 258860 }, { "epoch": 4.544848048596359, "grad_norm": 0.04598121662673226, "learning_rate": 6.1857670173482816e-06, "loss": 0.8396, "step": 258870 }, { "epoch": 4.54502361347636, "grad_norm": 0.05670985155003879, "learning_rate": 6.182561555755368e-06, "loss": 0.8497, "step": 258880 }, { "epoch": 4.545199178356362, "grad_norm": 0.04642859242453754, "learning_rate": 6.179357295517574e-06, "loss": 0.8347, "step": 258890 }, { "epoch": 4.545374743236363, "grad_norm": 0.051149482097598196, "learning_rate": 6.1761542366755e-06, "loss": 0.8368, "step": 258900 }, { "epoch": 4.545550308116365, "grad_norm": 0.055800170586550076, "learning_rate": 6.172952379269717e-06, "loss": 0.8305, "step": 258910 }, { "epoch": 4.545725872996366, "grad_norm": 0.05238397160071007, "learning_rate": 6.1697517233408224e-06, "loss": 0.8341, "step": 258920 }, { "epoch": 4.545901437876367, "grad_norm": 0.26125304684616146, "learning_rate": 6.166552268929323e-06, "loss": 0.8406, "step": 258930 }, { "epoch": 4.546077002756369, "grad_norm": 0.03939678976118692, "learning_rate": 6.163354016075785e-06, "loss": 0.8416, "step": 258940 }, { "epoch": 4.54625256763637, "grad_norm": 0.046159556229806484, "learning_rate": 6.160156964820746e-06, "loss": 0.8416, "step": 258950 }, { "epoch": 4.546428132516372, "grad_norm": 0.05117020138329338, "learning_rate": 6.156961115204663e-06, "loss": 0.8406, "step": 258960 }, { "epoch": 4.546603697396373, "grad_norm": 0.0476919549280704, "learning_rate": 6.153766467268082e-06, "loss": 0.8322, "step": 258970 }, { "epoch": 4.546779262276374, "grad_norm": 0.0551141991437599, "learning_rate": 6.150573021051452e-06, "loss": 0.8436, "step": 258980 }, { "epoch": 4.546954827156376, "grad_norm": 0.05273284877743412, "learning_rate": 6.147380776595242e-06, "loss": 0.8416, "step": 258990 }, { "epoch": 4.547130392036377, "grad_norm": 0.05250306184166529, "learning_rate": 6.144189733939889e-06, "loss": 0.8396, "step": 259000 }, { "epoch": 4.547305956916379, "grad_norm": 0.050003757141442666, "learning_rate": 6.14099989312583e-06, "loss": 0.8336, "step": 259010 }, { "epoch": 4.54748152179638, "grad_norm": 0.04818139439068836, "learning_rate": 6.137811254193501e-06, "loss": 0.839, "step": 259020 }, { "epoch": 4.547657086676381, "grad_norm": 0.05489535911456474, "learning_rate": 6.13462381718325e-06, "loss": 0.8309, "step": 259030 }, { "epoch": 4.547832651556383, "grad_norm": 0.05508860550218829, "learning_rate": 6.1314375821355266e-06, "loss": 0.8407, "step": 259040 }, { "epoch": 4.548008216436384, "grad_norm": 0.045802800394279364, "learning_rate": 6.128252549090655e-06, "loss": 0.8348, "step": 259050 }, { "epoch": 4.548183781316386, "grad_norm": 0.05259928073489056, "learning_rate": 6.125068718089008e-06, "loss": 0.8351, "step": 259060 }, { "epoch": 4.548359346196387, "grad_norm": 0.048405345211736846, "learning_rate": 6.121886089170934e-06, "loss": 0.84, "step": 259070 }, { "epoch": 4.548534911076389, "grad_norm": 0.04746045558550393, "learning_rate": 6.118704662376738e-06, "loss": 0.8383, "step": 259080 }, { "epoch": 4.54871047595639, "grad_norm": 0.05448188592320247, "learning_rate": 6.115524437746746e-06, "loss": 0.8368, "step": 259090 }, { "epoch": 4.548886040836392, "grad_norm": 0.04895468540030248, "learning_rate": 6.112345415321229e-06, "loss": 0.8343, "step": 259100 }, { "epoch": 4.549061605716393, "grad_norm": 0.04899103559052429, "learning_rate": 6.1091675951404814e-06, "loss": 0.8334, "step": 259110 }, { "epoch": 4.549237170596394, "grad_norm": 0.04968612384310643, "learning_rate": 6.105990977244788e-06, "loss": 0.8354, "step": 259120 }, { "epoch": 4.549412735476396, "grad_norm": 0.06716369289948639, "learning_rate": 6.102815561674341e-06, "loss": 0.8325, "step": 259130 }, { "epoch": 4.549588300356397, "grad_norm": 0.05579920746798574, "learning_rate": 6.099641348469435e-06, "loss": 0.8476, "step": 259140 }, { "epoch": 4.5497638652363985, "grad_norm": 0.0482599611702587, "learning_rate": 6.096468337670243e-06, "loss": 0.8375, "step": 259150 }, { "epoch": 4.5499394301164, "grad_norm": 0.05677830108563702, "learning_rate": 6.093296529317005e-06, "loss": 0.8429, "step": 259160 }, { "epoch": 4.550114994996401, "grad_norm": 0.04991339550410038, "learning_rate": 6.090125923449869e-06, "loss": 0.8366, "step": 259170 }, { "epoch": 4.5502905598764025, "grad_norm": 0.1234072788418804, "learning_rate": 6.086956520109033e-06, "loss": 0.8394, "step": 259180 }, { "epoch": 4.5504661247564036, "grad_norm": 0.05287114628757094, "learning_rate": 6.083788319334667e-06, "loss": 0.8328, "step": 259190 }, { "epoch": 4.5506416896364055, "grad_norm": 0.056840768136038645, "learning_rate": 6.080621321166858e-06, "loss": 0.8343, "step": 259200 }, { "epoch": 4.5508172545164065, "grad_norm": 0.043763598292643334, "learning_rate": 6.0774555256458e-06, "loss": 0.8396, "step": 259210 }, { "epoch": 4.5509928193964075, "grad_norm": 0.05245497926377255, "learning_rate": 6.074290932811557e-06, "loss": 0.8457, "step": 259220 }, { "epoch": 4.5511683842764095, "grad_norm": 0.050417519441191605, "learning_rate": 6.0711275427042346e-06, "loss": 0.8319, "step": 259230 }, { "epoch": 4.5513439491564105, "grad_norm": 0.07190467630787786, "learning_rate": 6.0679653553639395e-06, "loss": 0.8388, "step": 259240 }, { "epoch": 4.551519514036412, "grad_norm": 0.05276945564900768, "learning_rate": 6.064804370830703e-06, "loss": 0.8395, "step": 259250 }, { "epoch": 4.5516950789164135, "grad_norm": 0.0494674696048589, "learning_rate": 6.061644589144598e-06, "loss": 0.8343, "step": 259260 }, { "epoch": 4.5518706437964145, "grad_norm": 0.08310166028530845, "learning_rate": 6.058486010345645e-06, "loss": 0.8381, "step": 259270 }, { "epoch": 4.552046208676416, "grad_norm": 0.038095251486930146, "learning_rate": 6.0553286344738605e-06, "loss": 0.8434, "step": 259280 }, { "epoch": 4.552221773556417, "grad_norm": 0.05838608885369294, "learning_rate": 6.052172461569277e-06, "loss": 0.8437, "step": 259290 }, { "epoch": 4.552397338436419, "grad_norm": 0.04474900962449523, "learning_rate": 6.0490174916718335e-06, "loss": 0.8274, "step": 259300 }, { "epoch": 4.55257290331642, "grad_norm": 0.046745786412668563, "learning_rate": 6.045863724821575e-06, "loss": 0.8327, "step": 259310 }, { "epoch": 4.552748468196422, "grad_norm": 0.04869502906108674, "learning_rate": 6.042711161058396e-06, "loss": 0.8373, "step": 259320 }, { "epoch": 4.552924033076423, "grad_norm": 0.05234288238250763, "learning_rate": 6.0395598004222845e-06, "loss": 0.8384, "step": 259330 }, { "epoch": 4.553099597956425, "grad_norm": 0.05150865756307625, "learning_rate": 6.036409642953128e-06, "loss": 0.8458, "step": 259340 }, { "epoch": 4.553275162836426, "grad_norm": 0.055847420899608514, "learning_rate": 6.033260688690866e-06, "loss": 0.8378, "step": 259350 }, { "epoch": 4.553450727716427, "grad_norm": 0.05732461644864, "learning_rate": 6.0301129376754096e-06, "loss": 0.8342, "step": 259360 }, { "epoch": 4.553626292596429, "grad_norm": 0.04440734318248186, "learning_rate": 6.0269663899465915e-06, "loss": 0.8359, "step": 259370 }, { "epoch": 4.55380185747643, "grad_norm": 0.08849749081337754, "learning_rate": 6.0238210455443295e-06, "loss": 0.8394, "step": 259380 }, { "epoch": 4.553977422356432, "grad_norm": 0.05424606060154002, "learning_rate": 6.0206769045084585e-06, "loss": 0.8446, "step": 259390 }, { "epoch": 4.554152987236433, "grad_norm": 0.07015791844447881, "learning_rate": 6.017533966878808e-06, "loss": 0.8447, "step": 259400 }, { "epoch": 4.554328552116434, "grad_norm": 0.057619771915141976, "learning_rate": 6.014392232695212e-06, "loss": 0.8494, "step": 259410 }, { "epoch": 4.554504116996436, "grad_norm": 0.0474802928253836, "learning_rate": 6.011251701997468e-06, "loss": 0.8391, "step": 259420 }, { "epoch": 4.554679681876437, "grad_norm": 0.06028191374746356, "learning_rate": 6.008112374825377e-06, "loss": 0.8414, "step": 259430 }, { "epoch": 4.554855246756439, "grad_norm": 0.042899421382947826, "learning_rate": 6.004974251218693e-06, "loss": 0.8382, "step": 259440 }, { "epoch": 4.55503081163644, "grad_norm": 0.05489864772767699, "learning_rate": 6.001837331217194e-06, "loss": 0.8383, "step": 259450 }, { "epoch": 4.555206376516441, "grad_norm": 0.06755220005296364, "learning_rate": 5.998701614860635e-06, "loss": 0.832, "step": 259460 }, { "epoch": 4.555381941396443, "grad_norm": 0.08235307663685043, "learning_rate": 5.995567102188704e-06, "loss": 0.8373, "step": 259470 }, { "epoch": 4.555557506276444, "grad_norm": 0.05668591964738623, "learning_rate": 5.992433793241179e-06, "loss": 0.8315, "step": 259480 }, { "epoch": 4.555733071156446, "grad_norm": 0.05253155810244448, "learning_rate": 5.9893016880577056e-06, "loss": 0.8418, "step": 259490 }, { "epoch": 4.555908636036447, "grad_norm": 0.037676356200062676, "learning_rate": 5.986170786678007e-06, "loss": 0.8376, "step": 259500 }, { "epoch": 4.556084200916449, "grad_norm": 0.0474889695283454, "learning_rate": 5.983041089141715e-06, "loss": 0.8381, "step": 259510 }, { "epoch": 4.55625976579645, "grad_norm": 0.05350610133714206, "learning_rate": 5.9799125954884985e-06, "loss": 0.8469, "step": 259520 }, { "epoch": 4.556435330676451, "grad_norm": 0.050498990256105505, "learning_rate": 5.9767853057580255e-06, "loss": 0.8386, "step": 259530 }, { "epoch": 4.556610895556453, "grad_norm": 0.05579717583665096, "learning_rate": 5.973659219989863e-06, "loss": 0.8455, "step": 259540 }, { "epoch": 4.556786460436454, "grad_norm": 0.047855928435461666, "learning_rate": 5.970534338223667e-06, "loss": 0.8421, "step": 259550 }, { "epoch": 4.556962025316456, "grad_norm": 0.06152928240615446, "learning_rate": 5.9674106604990176e-06, "loss": 0.8351, "step": 259560 }, { "epoch": 4.557137590196457, "grad_norm": 0.05118113007659675, "learning_rate": 5.964288186855472e-06, "loss": 0.8396, "step": 259570 }, { "epoch": 4.557313155076459, "grad_norm": 0.06862592946548947, "learning_rate": 5.961166917332632e-06, "loss": 0.8337, "step": 259580 }, { "epoch": 4.55748871995646, "grad_norm": 0.07110519223543639, "learning_rate": 5.9580468519699994e-06, "loss": 0.8423, "step": 259590 }, { "epoch": 4.557664284836461, "grad_norm": 0.059993035534576786, "learning_rate": 5.954927990807142e-06, "loss": 0.8451, "step": 259600 }, { "epoch": 4.557839849716463, "grad_norm": 0.057351776356578614, "learning_rate": 5.951810333883564e-06, "loss": 0.8359, "step": 259610 }, { "epoch": 4.558015414596464, "grad_norm": 0.0915054062290721, "learning_rate": 5.9486938812387435e-06, "loss": 0.8355, "step": 259620 }, { "epoch": 4.558190979476466, "grad_norm": 0.04652606697655266, "learning_rate": 5.945578632912218e-06, "loss": 0.8433, "step": 259630 }, { "epoch": 4.558366544356467, "grad_norm": 0.06113396298067097, "learning_rate": 5.94246458894339e-06, "loss": 0.8371, "step": 259640 }, { "epoch": 4.558542109236468, "grad_norm": 0.05043048114466396, "learning_rate": 5.9393517493717946e-06, "loss": 0.8369, "step": 259650 }, { "epoch": 4.55871767411647, "grad_norm": 0.07003155924434468, "learning_rate": 5.936240114236803e-06, "loss": 0.8416, "step": 259660 }, { "epoch": 4.558893238996471, "grad_norm": 0.05524450149841554, "learning_rate": 5.933129683577883e-06, "loss": 0.8419, "step": 259670 }, { "epoch": 4.559068803876473, "grad_norm": 0.04731454740649935, "learning_rate": 5.9300204574344194e-06, "loss": 0.8399, "step": 259680 }, { "epoch": 4.559244368756474, "grad_norm": 0.05855033725146765, "learning_rate": 5.926912435845824e-06, "loss": 0.8423, "step": 259690 }, { "epoch": 4.559419933636475, "grad_norm": 0.12059557496529623, "learning_rate": 5.923805618851467e-06, "loss": 0.8378, "step": 259700 }, { "epoch": 4.559595498516477, "grad_norm": 0.04667376808141787, "learning_rate": 5.920700006490699e-06, "loss": 0.8427, "step": 259710 }, { "epoch": 4.559771063396478, "grad_norm": 0.056482859829440374, "learning_rate": 5.917595598802911e-06, "loss": 0.8411, "step": 259720 }, { "epoch": 4.55994662827648, "grad_norm": 0.05813660239062548, "learning_rate": 5.9144923958273855e-06, "loss": 0.8395, "step": 259730 }, { "epoch": 4.560122193156481, "grad_norm": 0.04655029611685512, "learning_rate": 5.911390397603472e-06, "loss": 0.8474, "step": 259740 }, { "epoch": 4.560297758036483, "grad_norm": 0.04267522967129286, "learning_rate": 5.9082896041704745e-06, "loss": 0.8335, "step": 259750 }, { "epoch": 4.560473322916484, "grad_norm": 0.049215249556880065, "learning_rate": 5.905190015567664e-06, "loss": 0.8409, "step": 259760 }, { "epoch": 4.560648887796485, "grad_norm": 0.07741266561894908, "learning_rate": 5.9020916318343355e-06, "loss": 0.834, "step": 259770 }, { "epoch": 4.560824452676487, "grad_norm": 0.05372163317538071, "learning_rate": 5.898994453009715e-06, "loss": 0.843, "step": 259780 }, { "epoch": 4.561000017556488, "grad_norm": 0.06836553403045616, "learning_rate": 5.8958984791330535e-06, "loss": 0.838, "step": 259790 }, { "epoch": 4.56117558243649, "grad_norm": 0.060770418414859986, "learning_rate": 5.892803710243612e-06, "loss": 0.8425, "step": 259800 }, { "epoch": 4.561351147316491, "grad_norm": 0.05486604393426086, "learning_rate": 5.889710146380539e-06, "loss": 0.8376, "step": 259810 }, { "epoch": 4.561526712196493, "grad_norm": 0.04571282076716153, "learning_rate": 5.886617787583096e-06, "loss": 0.8361, "step": 259820 }, { "epoch": 4.561702277076494, "grad_norm": 0.05835412689362552, "learning_rate": 5.883526633890423e-06, "loss": 0.8385, "step": 259830 }, { "epoch": 4.561877841956495, "grad_norm": 0.06142850726714548, "learning_rate": 5.880436685341704e-06, "loss": 0.8348, "step": 259840 }, { "epoch": 4.562053406836497, "grad_norm": 0.056516976099498475, "learning_rate": 5.877347941976067e-06, "loss": 0.8379, "step": 259850 }, { "epoch": 4.562228971716498, "grad_norm": 0.05821057668122707, "learning_rate": 5.874260403832674e-06, "loss": 0.8419, "step": 259860 }, { "epoch": 4.5624045365965, "grad_norm": 0.06136540846989266, "learning_rate": 5.871174070950633e-06, "loss": 0.8385, "step": 259870 }, { "epoch": 4.562580101476501, "grad_norm": 0.05896038880468855, "learning_rate": 5.868088943369026e-06, "loss": 0.84, "step": 259880 }, { "epoch": 4.562755666356502, "grad_norm": 0.048431324203224886, "learning_rate": 5.865005021126995e-06, "loss": 0.8376, "step": 259890 }, { "epoch": 4.562931231236504, "grad_norm": 0.04587101381573632, "learning_rate": 5.861922304263558e-06, "loss": 0.8444, "step": 259900 }, { "epoch": 4.563106796116505, "grad_norm": 0.04329191342743193, "learning_rate": 5.85884079281781e-06, "loss": 0.8367, "step": 259910 }, { "epoch": 4.5632823609965065, "grad_norm": 0.04840083729593486, "learning_rate": 5.855760486828792e-06, "loss": 0.839, "step": 259920 }, { "epoch": 4.563457925876508, "grad_norm": 0.04945505197100781, "learning_rate": 5.852681386335513e-06, "loss": 0.8379, "step": 259930 }, { "epoch": 4.563633490756509, "grad_norm": 0.05303354974528548, "learning_rate": 5.8496034913770024e-06, "loss": 0.8455, "step": 259940 }, { "epoch": 4.5638090556365105, "grad_norm": 0.05203008426170722, "learning_rate": 5.846526801992234e-06, "loss": 0.8397, "step": 259950 }, { "epoch": 4.5639846205165115, "grad_norm": 0.05037883400398477, "learning_rate": 5.8434513182202165e-06, "loss": 0.8426, "step": 259960 }, { "epoch": 4.5641601853965135, "grad_norm": 0.05174587287591693, "learning_rate": 5.840377040099924e-06, "loss": 0.8425, "step": 259970 }, { "epoch": 4.5643357502765145, "grad_norm": 0.06573207660199426, "learning_rate": 5.837303967670255e-06, "loss": 0.8351, "step": 259980 }, { "epoch": 4.564511315156516, "grad_norm": 0.056205678887750014, "learning_rate": 5.834232100970218e-06, "loss": 0.837, "step": 259990 }, { "epoch": 4.5646868800365175, "grad_norm": 0.04510443221945392, "learning_rate": 5.831161440038676e-06, "loss": 0.8456, "step": 260000 }, { "epoch": 4.564862444916519, "grad_norm": 0.04111026328811391, "learning_rate": 5.828091984914572e-06, "loss": 0.8406, "step": 260010 }, { "epoch": 4.56503800979652, "grad_norm": 0.05264323750378233, "learning_rate": 5.825023735636782e-06, "loss": 0.8396, "step": 260020 }, { "epoch": 4.5652135746765214, "grad_norm": 0.051180000244255335, "learning_rate": 5.821956692244171e-06, "loss": 0.8393, "step": 260030 }, { "epoch": 4.565389139556523, "grad_norm": 0.05839756918855802, "learning_rate": 5.818890854775626e-06, "loss": 0.8413, "step": 260040 }, { "epoch": 4.565564704436524, "grad_norm": 0.05204214139320574, "learning_rate": 5.8158262232699445e-06, "loss": 0.8315, "step": 260050 }, { "epoch": 4.565740269316526, "grad_norm": 0.048009328800942726, "learning_rate": 5.812762797766015e-06, "loss": 0.8372, "step": 260060 }, { "epoch": 4.565915834196527, "grad_norm": 0.0435609533119045, "learning_rate": 5.809700578302614e-06, "loss": 0.8344, "step": 260070 }, { "epoch": 4.566091399076528, "grad_norm": 0.05508855716753935, "learning_rate": 5.80663956491854e-06, "loss": 0.8363, "step": 260080 }, { "epoch": 4.56626696395653, "grad_norm": 0.048789713119665276, "learning_rate": 5.803579757652604e-06, "loss": 0.8357, "step": 260090 }, { "epoch": 4.566442528836531, "grad_norm": 0.04557847417552077, "learning_rate": 5.8005211565435485e-06, "loss": 0.8369, "step": 260100 }, { "epoch": 4.566618093716533, "grad_norm": 0.045716955668551724, "learning_rate": 5.797463761630141e-06, "loss": 0.8404, "step": 260110 }, { "epoch": 4.566793658596534, "grad_norm": 0.055495404730600856, "learning_rate": 5.794407572951103e-06, "loss": 0.8302, "step": 260120 }, { "epoch": 4.566969223476535, "grad_norm": 0.05975511515104652, "learning_rate": 5.791352590545178e-06, "loss": 0.8437, "step": 260130 }, { "epoch": 4.567144788356537, "grad_norm": 0.05086446898446266, "learning_rate": 5.7882988144510555e-06, "loss": 0.8368, "step": 260140 }, { "epoch": 4.567320353236538, "grad_norm": 0.06079318770575196, "learning_rate": 5.785246244707425e-06, "loss": 0.8404, "step": 260150 }, { "epoch": 4.56749591811654, "grad_norm": 0.04696315597790492, "learning_rate": 5.7821948813529856e-06, "loss": 0.8373, "step": 260160 }, { "epoch": 4.567671482996541, "grad_norm": 0.044833973031519855, "learning_rate": 5.779144724426383e-06, "loss": 0.8374, "step": 260170 }, { "epoch": 4.567847047876542, "grad_norm": 0.046225774442482714, "learning_rate": 5.776095773966273e-06, "loss": 0.8439, "step": 260180 }, { "epoch": 4.568022612756544, "grad_norm": 0.051976248093320625, "learning_rate": 5.773048030011268e-06, "loss": 0.8407, "step": 260190 }, { "epoch": 4.568198177636545, "grad_norm": 0.0471567032357954, "learning_rate": 5.7700014925999904e-06, "loss": 0.8381, "step": 260200 }, { "epoch": 4.568373742516547, "grad_norm": 0.06639301444856582, "learning_rate": 5.766956161771064e-06, "loss": 0.8382, "step": 260210 }, { "epoch": 4.568549307396548, "grad_norm": 0.05123168655164977, "learning_rate": 5.763912037563024e-06, "loss": 0.8401, "step": 260220 }, { "epoch": 4.56872487227655, "grad_norm": 0.052113740574395856, "learning_rate": 5.7608691200145046e-06, "loss": 0.8382, "step": 260230 }, { "epoch": 4.568900437156551, "grad_norm": 0.055454847619952466, "learning_rate": 5.7578274091640076e-06, "loss": 0.8379, "step": 260240 }, { "epoch": 4.569076002036553, "grad_norm": 0.05422100983372784, "learning_rate": 5.75478690505009e-06, "loss": 0.8373, "step": 260250 }, { "epoch": 4.569251566916554, "grad_norm": 0.04416213261994837, "learning_rate": 5.751747607711287e-06, "loss": 0.8354, "step": 260260 }, { "epoch": 4.569427131796555, "grad_norm": 0.05320371268926052, "learning_rate": 5.74870951718608e-06, "loss": 0.8378, "step": 260270 }, { "epoch": 4.569602696676557, "grad_norm": 0.05281328323303454, "learning_rate": 5.745672633512991e-06, "loss": 0.8368, "step": 260280 }, { "epoch": 4.569778261556558, "grad_norm": 0.07644337514668471, "learning_rate": 5.74263695673048e-06, "loss": 0.8389, "step": 260290 }, { "epoch": 4.56995382643656, "grad_norm": 0.049935541121361254, "learning_rate": 5.739602486877016e-06, "loss": 0.8391, "step": 260300 }, { "epoch": 4.570129391316561, "grad_norm": 0.04775725200573514, "learning_rate": 5.736569223991045e-06, "loss": 0.8362, "step": 260310 }, { "epoch": 4.570304956196562, "grad_norm": 0.06587688618628366, "learning_rate": 5.733537168110984e-06, "loss": 0.8386, "step": 260320 }, { "epoch": 4.570480521076564, "grad_norm": 0.0639672920310937, "learning_rate": 5.730506319275288e-06, "loss": 0.8429, "step": 260330 }, { "epoch": 4.570656085956565, "grad_norm": 0.0675525179006928, "learning_rate": 5.727476677522329e-06, "loss": 0.8416, "step": 260340 }, { "epoch": 4.570831650836567, "grad_norm": 0.05391727357958606, "learning_rate": 5.724448242890499e-06, "loss": 0.8403, "step": 260350 }, { "epoch": 4.571007215716568, "grad_norm": 0.047049543248466545, "learning_rate": 5.721421015418169e-06, "loss": 0.8334, "step": 260360 }, { "epoch": 4.571182780596569, "grad_norm": 0.05198565436853326, "learning_rate": 5.718394995143686e-06, "loss": 0.8423, "step": 260370 }, { "epoch": 4.571358345476571, "grad_norm": 0.05053279719364805, "learning_rate": 5.715370182105411e-06, "loss": 0.8402, "step": 260380 }, { "epoch": 4.571533910356572, "grad_norm": 0.04574154490394495, "learning_rate": 5.712346576341635e-06, "loss": 0.8431, "step": 260390 }, { "epoch": 4.571709475236574, "grad_norm": 0.06027303008679638, "learning_rate": 5.709324177890719e-06, "loss": 0.8312, "step": 260400 }, { "epoch": 4.571885040116575, "grad_norm": 0.049681323681907044, "learning_rate": 5.706302986790912e-06, "loss": 0.8376, "step": 260410 }, { "epoch": 4.572060604996577, "grad_norm": 0.0538793099131825, "learning_rate": 5.703283003080508e-06, "loss": 0.8346, "step": 260420 }, { "epoch": 4.572236169876578, "grad_norm": 0.046053285740096095, "learning_rate": 5.700264226797789e-06, "loss": 0.8335, "step": 260430 }, { "epoch": 4.572411734756579, "grad_norm": 0.05574316191098782, "learning_rate": 5.697246657980971e-06, "loss": 0.8456, "step": 260440 }, { "epoch": 4.572587299636581, "grad_norm": 0.04901277015552072, "learning_rate": 5.694230296668314e-06, "loss": 0.8422, "step": 260450 }, { "epoch": 4.572762864516582, "grad_norm": 0.04849111246306439, "learning_rate": 5.691215142898015e-06, "loss": 0.8447, "step": 260460 }, { "epoch": 4.572938429396584, "grad_norm": 0.07516373297964565, "learning_rate": 5.688201196708276e-06, "loss": 0.8356, "step": 260470 }, { "epoch": 4.573113994276585, "grad_norm": 0.053558684518552314, "learning_rate": 5.6851884581373155e-06, "loss": 0.8409, "step": 260480 }, { "epoch": 4.573289559156587, "grad_norm": 0.05783665783449839, "learning_rate": 5.682176927223261e-06, "loss": 0.8416, "step": 260490 }, { "epoch": 4.573465124036588, "grad_norm": 0.04518310450602686, "learning_rate": 5.679166604004307e-06, "loss": 0.8453, "step": 260500 }, { "epoch": 4.573640688916589, "grad_norm": 0.052777001085918186, "learning_rate": 5.676157488518584e-06, "loss": 0.8318, "step": 260510 }, { "epoch": 4.573816253796591, "grad_norm": 0.0524946401886517, "learning_rate": 5.673149580804218e-06, "loss": 0.8374, "step": 260520 }, { "epoch": 4.573991818676592, "grad_norm": 0.045016386408987134, "learning_rate": 5.670142880899295e-06, "loss": 0.8414, "step": 260530 }, { "epoch": 4.574167383556594, "grad_norm": 0.05009107213653581, "learning_rate": 5.6671373888419414e-06, "loss": 0.8375, "step": 260540 }, { "epoch": 4.574342948436595, "grad_norm": 0.04803440327640358, "learning_rate": 5.664133104670245e-06, "loss": 0.836, "step": 260550 }, { "epoch": 4.574518513316596, "grad_norm": 0.06052962120610911, "learning_rate": 5.661130028422221e-06, "loss": 0.8418, "step": 260560 }, { "epoch": 4.574694078196598, "grad_norm": 0.046496818669047445, "learning_rate": 5.658128160135968e-06, "loss": 0.8366, "step": 260570 }, { "epoch": 4.574869643076599, "grad_norm": 0.04926835832103773, "learning_rate": 5.655127499849503e-06, "loss": 0.8389, "step": 260580 }, { "epoch": 4.575045207956601, "grad_norm": 0.047998383148685604, "learning_rate": 5.652128047600845e-06, "loss": 0.8405, "step": 260590 }, { "epoch": 4.575220772836602, "grad_norm": 0.062431703154047176, "learning_rate": 5.649129803428012e-06, "loss": 0.8441, "step": 260600 }, { "epoch": 4.575396337716603, "grad_norm": 0.041797479572235015, "learning_rate": 5.6461327673689565e-06, "loss": 0.8363, "step": 260610 }, { "epoch": 4.575571902596605, "grad_norm": 0.04492683427460048, "learning_rate": 5.643136939461687e-06, "loss": 0.8368, "step": 260620 }, { "epoch": 4.575747467476606, "grad_norm": 0.05544142129049827, "learning_rate": 5.640142319744145e-06, "loss": 0.8371, "step": 260630 }, { "epoch": 4.575923032356608, "grad_norm": 0.053793075674301694, "learning_rate": 5.637148908254272e-06, "loss": 0.8354, "step": 260640 }, { "epoch": 4.576098597236609, "grad_norm": 0.061829541878660174, "learning_rate": 5.634156705029999e-06, "loss": 0.8398, "step": 260650 }, { "epoch": 4.5762741621166105, "grad_norm": 0.05746092598315417, "learning_rate": 5.631165710109222e-06, "loss": 0.8347, "step": 260660 }, { "epoch": 4.576449726996612, "grad_norm": 0.05027338441307556, "learning_rate": 5.628175923529885e-06, "loss": 0.8326, "step": 260670 }, { "epoch": 4.5766252918766135, "grad_norm": 0.07218786187556964, "learning_rate": 5.625187345329817e-06, "loss": 0.8415, "step": 260680 }, { "epoch": 4.5768008567566145, "grad_norm": 0.07088710036086782, "learning_rate": 5.622199975546908e-06, "loss": 0.8446, "step": 260690 }, { "epoch": 4.5769764216366156, "grad_norm": 0.0451096606310002, "learning_rate": 5.6192138142189985e-06, "loss": 0.837, "step": 260700 }, { "epoch": 4.5771519865166175, "grad_norm": 0.04867928677439403, "learning_rate": 5.616228861383932e-06, "loss": 0.8349, "step": 260710 }, { "epoch": 4.5773275513966185, "grad_norm": 0.05602714622498141, "learning_rate": 5.61324511707954e-06, "loss": 0.8317, "step": 260720 }, { "epoch": 4.57750311627662, "grad_norm": 0.05197886395653192, "learning_rate": 5.610262581343588e-06, "loss": 0.8393, "step": 260730 }, { "epoch": 4.5776786811566215, "grad_norm": 0.044981952624113294, "learning_rate": 5.6072812542139085e-06, "loss": 0.8357, "step": 260740 }, { "epoch": 4.5778542460366225, "grad_norm": 0.044301617574425536, "learning_rate": 5.6043011357282555e-06, "loss": 0.8467, "step": 260750 }, { "epoch": 4.578029810916624, "grad_norm": 0.04803220795092703, "learning_rate": 5.6013222259243845e-06, "loss": 0.8335, "step": 260760 }, { "epoch": 4.5782053757966255, "grad_norm": 0.04939921471546582, "learning_rate": 5.5983445248400496e-06, "loss": 0.8405, "step": 260770 }, { "epoch": 4.578380940676627, "grad_norm": 0.05216337330491002, "learning_rate": 5.595368032512974e-06, "loss": 0.8414, "step": 260780 }, { "epoch": 4.578556505556628, "grad_norm": 0.04855345668985025, "learning_rate": 5.592392748980877e-06, "loss": 0.8372, "step": 260790 }, { "epoch": 4.578732070436629, "grad_norm": 0.046222495519975065, "learning_rate": 5.5894186742814275e-06, "loss": 0.8332, "step": 260800 }, { "epoch": 4.578907635316631, "grad_norm": 0.05336316054928661, "learning_rate": 5.5864458084523365e-06, "loss": 0.8403, "step": 260810 }, { "epoch": 4.579083200196632, "grad_norm": 0.04995009682370866, "learning_rate": 5.58347415153128e-06, "loss": 0.8401, "step": 260820 }, { "epoch": 4.579258765076634, "grad_norm": 0.051384326189187195, "learning_rate": 5.580503703555862e-06, "loss": 0.8396, "step": 260830 }, { "epoch": 4.579434329956635, "grad_norm": 0.05116121393796459, "learning_rate": 5.577534464563767e-06, "loss": 0.8394, "step": 260840 }, { "epoch": 4.579609894836636, "grad_norm": 0.05859434319731726, "learning_rate": 5.5745664345926e-06, "loss": 0.8416, "step": 260850 }, { "epoch": 4.579785459716638, "grad_norm": 0.043095294415940705, "learning_rate": 5.57159961367996e-06, "loss": 0.8417, "step": 260860 }, { "epoch": 4.579961024596639, "grad_norm": 0.05969538537577336, "learning_rate": 5.568634001863448e-06, "loss": 0.8371, "step": 260870 }, { "epoch": 4.580136589476641, "grad_norm": 0.06885761616278054, "learning_rate": 5.565669599180622e-06, "loss": 0.8415, "step": 260880 }, { "epoch": 4.580312154356642, "grad_norm": 0.046109495707382875, "learning_rate": 5.562706405669059e-06, "loss": 0.8405, "step": 260890 }, { "epoch": 4.580487719236644, "grad_norm": 0.04746156634874245, "learning_rate": 5.5597444213662846e-06, "loss": 0.8348, "step": 260900 }, { "epoch": 4.580663284116645, "grad_norm": 0.06640127780695726, "learning_rate": 5.556783646309855e-06, "loss": 0.8409, "step": 260910 }, { "epoch": 4.580838848996647, "grad_norm": 0.051953993820343214, "learning_rate": 5.553824080537261e-06, "loss": 0.8401, "step": 260920 }, { "epoch": 4.581014413876648, "grad_norm": 0.041957730093034225, "learning_rate": 5.550865724086006e-06, "loss": 0.8447, "step": 260930 }, { "epoch": 4.581189978756649, "grad_norm": 0.051330990796286904, "learning_rate": 5.5479085769935904e-06, "loss": 0.8351, "step": 260940 }, { "epoch": 4.581365543636651, "grad_norm": 0.05066504071073231, "learning_rate": 5.544952639297452e-06, "loss": 0.8401, "step": 260950 }, { "epoch": 4.581541108516652, "grad_norm": 0.04572979650038626, "learning_rate": 5.541997911035058e-06, "loss": 0.8425, "step": 260960 }, { "epoch": 4.581716673396654, "grad_norm": 0.04925079666008366, "learning_rate": 5.5390443922438455e-06, "loss": 0.8406, "step": 260970 }, { "epoch": 4.581892238276655, "grad_norm": 0.0546560375813379, "learning_rate": 5.536092082961239e-06, "loss": 0.8364, "step": 260980 }, { "epoch": 4.582067803156656, "grad_norm": 0.06390965600966374, "learning_rate": 5.533140983224641e-06, "loss": 0.833, "step": 260990 }, { "epoch": 4.582243368036658, "grad_norm": 0.04877339924278053, "learning_rate": 5.530191093071435e-06, "loss": 0.8412, "step": 261000 }, { "epoch": 4.582418932916659, "grad_norm": 0.04356293732649791, "learning_rate": 5.527242412539011e-06, "loss": 0.8369, "step": 261010 }, { "epoch": 4.582594497796661, "grad_norm": 0.11020853702765955, "learning_rate": 5.5242949416647285e-06, "loss": 0.8301, "step": 261020 }, { "epoch": 4.582770062676662, "grad_norm": 0.04891444650190832, "learning_rate": 5.5213486804859245e-06, "loss": 0.831, "step": 261030 }, { "epoch": 4.582945627556663, "grad_norm": 0.053811021422815235, "learning_rate": 5.518403629039926e-06, "loss": 0.8429, "step": 261040 }, { "epoch": 4.583121192436665, "grad_norm": 0.06212583897341815, "learning_rate": 5.515459787364047e-06, "loss": 0.8322, "step": 261050 }, { "epoch": 4.583296757316666, "grad_norm": 0.08135216745540368, "learning_rate": 5.512517155495615e-06, "loss": 0.833, "step": 261060 }, { "epoch": 4.583472322196668, "grad_norm": 0.0582254058197225, "learning_rate": 5.5095757334718555e-06, "loss": 0.8371, "step": 261070 }, { "epoch": 4.583647887076669, "grad_norm": 0.04525898633776517, "learning_rate": 5.506635521330096e-06, "loss": 0.8374, "step": 261080 }, { "epoch": 4.583823451956671, "grad_norm": 0.045161143593001885, "learning_rate": 5.503696519107564e-06, "loss": 0.8386, "step": 261090 }, { "epoch": 4.583999016836672, "grad_norm": 0.054478239876847384, "learning_rate": 5.500758726841484e-06, "loss": 0.8355, "step": 261100 }, { "epoch": 4.584174581716673, "grad_norm": 0.03987485641158511, "learning_rate": 5.4978221445691206e-06, "loss": 0.8404, "step": 261110 }, { "epoch": 4.584350146596675, "grad_norm": 0.06884366267151301, "learning_rate": 5.494886772327632e-06, "loss": 0.8368, "step": 261120 }, { "epoch": 4.584525711476676, "grad_norm": 0.08975631076551997, "learning_rate": 5.4919526101542455e-06, "loss": 0.8258, "step": 261130 }, { "epoch": 4.584701276356678, "grad_norm": 0.06578671920753462, "learning_rate": 5.489019658086112e-06, "loss": 0.8357, "step": 261140 }, { "epoch": 4.584876841236679, "grad_norm": 0.042872253333685155, "learning_rate": 5.486087916160405e-06, "loss": 0.8475, "step": 261150 }, { "epoch": 4.585052406116681, "grad_norm": 0.06270528070503455, "learning_rate": 5.483157384414283e-06, "loss": 0.8435, "step": 261160 }, { "epoch": 4.585227970996682, "grad_norm": 0.06330228248123161, "learning_rate": 5.480228062884833e-06, "loss": 0.8376, "step": 261170 }, { "epoch": 4.585403535876683, "grad_norm": 0.06205345793009414, "learning_rate": 5.4772999516092255e-06, "loss": 0.8417, "step": 261180 }, { "epoch": 4.585579100756685, "grad_norm": 0.0612597448306351, "learning_rate": 5.474373050624523e-06, "loss": 0.8383, "step": 261190 }, { "epoch": 4.585754665636686, "grad_norm": 0.04542710277092864, "learning_rate": 5.471447359967844e-06, "loss": 0.8426, "step": 261200 }, { "epoch": 4.585930230516688, "grad_norm": 0.09089939142432851, "learning_rate": 5.468522879676207e-06, "loss": 0.8297, "step": 261210 }, { "epoch": 4.586105795396689, "grad_norm": 0.04688301939907682, "learning_rate": 5.465599609786706e-06, "loss": 0.8377, "step": 261220 }, { "epoch": 4.58628136027669, "grad_norm": 0.06191464704347124, "learning_rate": 5.46267755033637e-06, "loss": 0.8427, "step": 261230 }, { "epoch": 4.586456925156692, "grad_norm": 0.05199584538448237, "learning_rate": 5.4597567013621965e-06, "loss": 0.8351, "step": 261240 }, { "epoch": 4.586632490036693, "grad_norm": 0.04372234658373586, "learning_rate": 5.456837062901237e-06, "loss": 0.84, "step": 261250 }, { "epoch": 4.586808054916695, "grad_norm": 0.04507521758211286, "learning_rate": 5.453918634990455e-06, "loss": 0.8387, "step": 261260 }, { "epoch": 4.586983619796696, "grad_norm": 0.05736018074111804, "learning_rate": 5.4510014176668355e-06, "loss": 0.8408, "step": 261270 }, { "epoch": 4.587159184676697, "grad_norm": 0.046175838597318705, "learning_rate": 5.448085410967353e-06, "loss": 0.843, "step": 261280 }, { "epoch": 4.587334749556699, "grad_norm": 0.04768990774115036, "learning_rate": 5.445170614928926e-06, "loss": 0.8346, "step": 261290 }, { "epoch": 4.5875103144367, "grad_norm": 0.05004968271774163, "learning_rate": 5.442257029588519e-06, "loss": 0.8421, "step": 261300 }, { "epoch": 4.587685879316702, "grad_norm": 0.055895407734581626, "learning_rate": 5.439344654983008e-06, "loss": 0.8455, "step": 261310 }, { "epoch": 4.587861444196703, "grad_norm": 0.04474108403478276, "learning_rate": 5.436433491149322e-06, "loss": 0.839, "step": 261320 }, { "epoch": 4.588037009076705, "grad_norm": 0.05295484747438008, "learning_rate": 5.43352353812435e-06, "loss": 0.8446, "step": 261330 }, { "epoch": 4.588212573956706, "grad_norm": 0.06042197190010765, "learning_rate": 5.430614795944919e-06, "loss": 0.8444, "step": 261340 }, { "epoch": 4.588388138836708, "grad_norm": 0.05476079585650681, "learning_rate": 5.427707264647943e-06, "loss": 0.838, "step": 261350 }, { "epoch": 4.588563703716709, "grad_norm": 0.052470474905806475, "learning_rate": 5.424800944270216e-06, "loss": 0.8389, "step": 261360 }, { "epoch": 4.58873926859671, "grad_norm": 0.05022561099222604, "learning_rate": 5.4218958348485945e-06, "loss": 0.8433, "step": 261370 }, { "epoch": 4.588914833476712, "grad_norm": 0.051891665132704656, "learning_rate": 5.4189919364198536e-06, "loss": 0.8436, "step": 261380 }, { "epoch": 4.589090398356713, "grad_norm": 0.06352308847681294, "learning_rate": 5.416089249020803e-06, "loss": 0.8432, "step": 261390 }, { "epoch": 4.5892659632367145, "grad_norm": 0.05905881554031981, "learning_rate": 5.413187772688242e-06, "loss": 0.84, "step": 261400 }, { "epoch": 4.589441528116716, "grad_norm": 0.04836234182212396, "learning_rate": 5.41028750745888e-06, "loss": 0.8372, "step": 261410 }, { "epoch": 4.589617092996717, "grad_norm": 0.07379331248016238, "learning_rate": 5.407388453369518e-06, "loss": 0.8341, "step": 261420 }, { "epoch": 4.5897926578767185, "grad_norm": 0.049465258169569884, "learning_rate": 5.4044906104568524e-06, "loss": 0.837, "step": 261430 }, { "epoch": 4.58996822275672, "grad_norm": 0.04990745171611921, "learning_rate": 5.401593978757618e-06, "loss": 0.8421, "step": 261440 }, { "epoch": 4.5901437876367215, "grad_norm": 0.052064588421338195, "learning_rate": 5.398698558308513e-06, "loss": 0.8352, "step": 261450 }, { "epoch": 4.5903193525167225, "grad_norm": 0.042675080524525295, "learning_rate": 5.395804349146205e-06, "loss": 0.836, "step": 261460 }, { "epoch": 4.5904949173967236, "grad_norm": 0.05880777043553095, "learning_rate": 5.392911351307394e-06, "loss": 0.8367, "step": 261470 }, { "epoch": 4.5906704822767255, "grad_norm": 0.058244423224309304, "learning_rate": 5.390019564828714e-06, "loss": 0.8388, "step": 261480 }, { "epoch": 4.5908460471567265, "grad_norm": 0.043041186786967535, "learning_rate": 5.3871289897468074e-06, "loss": 0.83, "step": 261490 }, { "epoch": 4.591021612036728, "grad_norm": 0.05435934218712979, "learning_rate": 5.38423962609831e-06, "loss": 0.8509, "step": 261500 }, { "epoch": 4.5911971769167295, "grad_norm": 0.06367198489612162, "learning_rate": 5.381351473919788e-06, "loss": 0.833, "step": 261510 }, { "epoch": 4.5913727417967305, "grad_norm": 0.4894884722619946, "learning_rate": 5.378464533247911e-06, "loss": 0.842, "step": 261520 }, { "epoch": 4.591548306676732, "grad_norm": 0.056647543700587506, "learning_rate": 5.375578804119188e-06, "loss": 0.8438, "step": 261530 }, { "epoch": 4.5917238715567334, "grad_norm": 0.059134882452687006, "learning_rate": 5.372694286570223e-06, "loss": 0.8368, "step": 261540 }, { "epoch": 4.591899436436735, "grad_norm": 0.05202157988126208, "learning_rate": 5.369810980637537e-06, "loss": 0.8349, "step": 261550 }, { "epoch": 4.592075001316736, "grad_norm": 0.05063056872337115, "learning_rate": 5.366928886357678e-06, "loss": 0.8389, "step": 261560 }, { "epoch": 4.592250566196738, "grad_norm": 0.06274773169934673, "learning_rate": 5.364048003767157e-06, "loss": 0.8359, "step": 261570 }, { "epoch": 4.592426131076739, "grad_norm": 0.04851988354739308, "learning_rate": 5.3611683329024665e-06, "loss": 0.8279, "step": 261580 }, { "epoch": 4.592601695956741, "grad_norm": 0.0629674767099638, "learning_rate": 5.358289873800118e-06, "loss": 0.8344, "step": 261590 }, { "epoch": 4.592777260836742, "grad_norm": 0.05504774831232978, "learning_rate": 5.355412626496559e-06, "loss": 0.8458, "step": 261600 }, { "epoch": 4.592952825716743, "grad_norm": 0.05086075851796276, "learning_rate": 5.352536591028247e-06, "loss": 0.8429, "step": 261610 }, { "epoch": 4.593128390596745, "grad_norm": 0.05318375489393171, "learning_rate": 5.349661767431651e-06, "loss": 0.8394, "step": 261620 }, { "epoch": 4.593303955476746, "grad_norm": 0.05109199823646804, "learning_rate": 5.346788155743152e-06, "loss": 0.8339, "step": 261630 }, { "epoch": 4.593479520356748, "grad_norm": 0.05060357423228389, "learning_rate": 5.343915755999186e-06, "loss": 0.839, "step": 261640 }, { "epoch": 4.593655085236749, "grad_norm": 0.04726535667380729, "learning_rate": 5.3410445682361325e-06, "loss": 0.8429, "step": 261650 }, { "epoch": 4.59383065011675, "grad_norm": 0.06144764670731672, "learning_rate": 5.338174592490375e-06, "loss": 0.8401, "step": 261660 }, { "epoch": 4.594006214996752, "grad_norm": 0.04120858648609949, "learning_rate": 5.335305828798292e-06, "loss": 0.8326, "step": 261670 }, { "epoch": 4.594181779876753, "grad_norm": 0.04722283323615609, "learning_rate": 5.3324382771962e-06, "loss": 0.8362, "step": 261680 }, { "epoch": 4.594357344756755, "grad_norm": 0.05064025788207252, "learning_rate": 5.3295719377204675e-06, "loss": 0.8479, "step": 261690 }, { "epoch": 4.594532909636756, "grad_norm": 0.04931411284425824, "learning_rate": 5.326706810407377e-06, "loss": 0.8482, "step": 261700 }, { "epoch": 4.594708474516757, "grad_norm": 0.05258786462744108, "learning_rate": 5.3238428952932665e-06, "loss": 0.8393, "step": 261710 }, { "epoch": 4.594884039396759, "grad_norm": 0.052785567101311354, "learning_rate": 5.320980192414384e-06, "loss": 0.8471, "step": 261720 }, { "epoch": 4.59505960427676, "grad_norm": 0.05487056716997818, "learning_rate": 5.3181187018070334e-06, "loss": 0.8397, "step": 261730 }, { "epoch": 4.595235169156762, "grad_norm": 0.05973123982192329, "learning_rate": 5.3152584235074534e-06, "loss": 0.8327, "step": 261740 }, { "epoch": 4.595410734036763, "grad_norm": 0.04647689758099906, "learning_rate": 5.31239935755187e-06, "loss": 0.8366, "step": 261750 }, { "epoch": 4.595586298916765, "grad_norm": 0.04055038081119253, "learning_rate": 5.309541503976545e-06, "loss": 0.8355, "step": 261760 }, { "epoch": 4.595761863796766, "grad_norm": 0.07272633034976074, "learning_rate": 5.306684862817659e-06, "loss": 0.8456, "step": 261770 }, { "epoch": 4.595937428676767, "grad_norm": 0.05343729401010844, "learning_rate": 5.303829434111419e-06, "loss": 0.8267, "step": 261780 }, { "epoch": 4.596112993556769, "grad_norm": 0.05439407485408468, "learning_rate": 5.300975217894008e-06, "loss": 0.8382, "step": 261790 }, { "epoch": 4.59628855843677, "grad_norm": 0.06074120099565308, "learning_rate": 5.298122214201574e-06, "loss": 0.8331, "step": 261800 }, { "epoch": 4.596464123316772, "grad_norm": 0.04942280604853766, "learning_rate": 5.29527042307028e-06, "loss": 0.8272, "step": 261810 }, { "epoch": 4.596639688196773, "grad_norm": 0.07027951276899684, "learning_rate": 5.292419844536232e-06, "loss": 0.8349, "step": 261820 }, { "epoch": 4.596815253076775, "grad_norm": 0.04930440915048122, "learning_rate": 5.289570478635579e-06, "loss": 0.8389, "step": 261830 }, { "epoch": 4.596990817956776, "grad_norm": 0.07960294656758801, "learning_rate": 5.286722325404417e-06, "loss": 0.8456, "step": 261840 }, { "epoch": 4.597166382836777, "grad_norm": 0.04235309073935361, "learning_rate": 5.2838753848788085e-06, "loss": 0.8384, "step": 261850 }, { "epoch": 4.597341947716779, "grad_norm": 0.06930937031068539, "learning_rate": 5.281029657094859e-06, "loss": 0.8425, "step": 261860 }, { "epoch": 4.59751751259678, "grad_norm": 0.05348690020540727, "learning_rate": 5.278185142088597e-06, "loss": 0.8365, "step": 261870 }, { "epoch": 4.597693077476782, "grad_norm": 0.048901616032612984, "learning_rate": 5.2753418398960964e-06, "loss": 0.8374, "step": 261880 }, { "epoch": 4.597868642356783, "grad_norm": 0.05132998055420026, "learning_rate": 5.272499750553343e-06, "loss": 0.8371, "step": 261890 }, { "epoch": 4.598044207236784, "grad_norm": 0.046157037657295626, "learning_rate": 5.269658874096355e-06, "loss": 0.835, "step": 261900 }, { "epoch": 4.598219772116786, "grad_norm": 0.048640766486540525, "learning_rate": 5.26681921056116e-06, "loss": 0.8303, "step": 261910 }, { "epoch": 4.598395336996787, "grad_norm": 0.04594977283853093, "learning_rate": 5.263980759983679e-06, "loss": 0.8396, "step": 261920 }, { "epoch": 4.598570901876789, "grad_norm": 0.0605704046153225, "learning_rate": 5.261143522399941e-06, "loss": 0.8404, "step": 261930 }, { "epoch": 4.59874646675679, "grad_norm": 0.05164178234943895, "learning_rate": 5.258307497845852e-06, "loss": 0.8407, "step": 261940 }, { "epoch": 4.598922031636791, "grad_norm": 0.042371515768627695, "learning_rate": 5.255472686357345e-06, "loss": 0.83, "step": 261950 }, { "epoch": 4.599097596516793, "grad_norm": 0.0562500618434656, "learning_rate": 5.252639087970371e-06, "loss": 0.8377, "step": 261960 }, { "epoch": 4.599273161396794, "grad_norm": 0.05226137731652457, "learning_rate": 5.249806702720795e-06, "loss": 0.8395, "step": 261970 }, { "epoch": 4.599448726276796, "grad_norm": 0.06784334474701646, "learning_rate": 5.246975530644524e-06, "loss": 0.8381, "step": 261980 }, { "epoch": 4.599624291156797, "grad_norm": 0.0515398444979551, "learning_rate": 5.2441455717774125e-06, "loss": 0.8371, "step": 261990 }, { "epoch": 4.599799856036799, "grad_norm": 0.05665140546975422, "learning_rate": 5.241316826155336e-06, "loss": 0.8438, "step": 262000 }, { "epoch": 4.5999754209168, "grad_norm": 0.044614239420091795, "learning_rate": 5.238489293814136e-06, "loss": 0.8374, "step": 262010 }, { "epoch": 4.600150985796802, "grad_norm": 0.05502169815144073, "learning_rate": 5.2356629747896095e-06, "loss": 0.8391, "step": 262020 }, { "epoch": 4.600326550676803, "grad_norm": 0.06209861017228469, "learning_rate": 5.232837869117602e-06, "loss": 0.8396, "step": 262030 }, { "epoch": 4.600502115556804, "grad_norm": 0.04312918294487879, "learning_rate": 5.2300139768338885e-06, "loss": 0.8406, "step": 262040 }, { "epoch": 4.600677680436806, "grad_norm": 0.0697830520980734, "learning_rate": 5.227191297974255e-06, "loss": 0.8326, "step": 262050 }, { "epoch": 4.600853245316807, "grad_norm": 0.047280217910018324, "learning_rate": 5.224369832574459e-06, "loss": 0.8397, "step": 262060 }, { "epoch": 4.601028810196809, "grad_norm": 0.04476938515267581, "learning_rate": 5.221549580670252e-06, "loss": 0.8377, "step": 262070 }, { "epoch": 4.60120437507681, "grad_norm": 0.05178397305072568, "learning_rate": 5.218730542297379e-06, "loss": 0.8362, "step": 262080 }, { "epoch": 4.601379939956811, "grad_norm": 0.05969399529703824, "learning_rate": 5.215912717491515e-06, "loss": 0.8351, "step": 262090 }, { "epoch": 4.601555504836813, "grad_norm": 0.05500071598341896, "learning_rate": 5.213096106288418e-06, "loss": 0.8342, "step": 262100 }, { "epoch": 4.601731069716814, "grad_norm": 0.057477159935514825, "learning_rate": 5.210280708723753e-06, "loss": 0.8363, "step": 262110 }, { "epoch": 4.601906634596816, "grad_norm": 0.04953224786202924, "learning_rate": 5.207466524833174e-06, "loss": 0.832, "step": 262120 }, { "epoch": 4.602082199476817, "grad_norm": 0.05847043068187543, "learning_rate": 5.204653554652371e-06, "loss": 0.8412, "step": 262130 }, { "epoch": 4.602257764356818, "grad_norm": 0.044493148635570176, "learning_rate": 5.201841798216956e-06, "loss": 0.8374, "step": 262140 }, { "epoch": 4.60243332923682, "grad_norm": 0.05181689950082289, "learning_rate": 5.1990312555625714e-06, "loss": 0.8389, "step": 262150 }, { "epoch": 4.602608894116821, "grad_norm": 0.0580517350789721, "learning_rate": 5.19622192672482e-06, "loss": 0.839, "step": 262160 }, { "epoch": 4.6027844589968225, "grad_norm": 0.04856664243933699, "learning_rate": 5.193413811739291e-06, "loss": 0.8399, "step": 262170 }, { "epoch": 4.602960023876824, "grad_norm": 0.05158850295190827, "learning_rate": 5.190606910641583e-06, "loss": 0.8309, "step": 262180 }, { "epoch": 4.603135588756825, "grad_norm": 0.0388385725233393, "learning_rate": 5.187801223467221e-06, "loss": 0.841, "step": 262190 }, { "epoch": 4.6033111536368265, "grad_norm": 0.055583810929912165, "learning_rate": 5.184996750251806e-06, "loss": 0.8348, "step": 262200 }, { "epoch": 4.603486718516828, "grad_norm": 0.0504311099503771, "learning_rate": 5.182193491030827e-06, "loss": 0.8383, "step": 262210 }, { "epoch": 4.6036622833968295, "grad_norm": 0.05084659149034944, "learning_rate": 5.179391445839843e-06, "loss": 0.8425, "step": 262220 }, { "epoch": 4.6038378482768305, "grad_norm": 0.04494099700107037, "learning_rate": 5.1765906147143086e-06, "loss": 0.842, "step": 262230 }, { "epoch": 4.604013413156832, "grad_norm": 0.04519878421919736, "learning_rate": 5.17379099768975e-06, "loss": 0.8389, "step": 262240 }, { "epoch": 4.6041889780368335, "grad_norm": 0.06774205097646795, "learning_rate": 5.170992594801623e-06, "loss": 0.8482, "step": 262250 }, { "epoch": 4.604364542916835, "grad_norm": 0.06497335966751934, "learning_rate": 5.168195406085364e-06, "loss": 0.8363, "step": 262260 }, { "epoch": 4.604540107796836, "grad_norm": 0.05619169930963423, "learning_rate": 5.165399431576465e-06, "loss": 0.8422, "step": 262270 }, { "epoch": 4.6047156726768375, "grad_norm": 0.04990488690517049, "learning_rate": 5.162604671310304e-06, "loss": 0.8458, "step": 262280 }, { "epoch": 4.604891237556839, "grad_norm": 0.05024322971854726, "learning_rate": 5.159811125322319e-06, "loss": 0.844, "step": 262290 }, { "epoch": 4.60506680243684, "grad_norm": 0.042978243655637766, "learning_rate": 5.1570187936478995e-06, "loss": 0.8385, "step": 262300 }, { "epoch": 4.605242367316842, "grad_norm": 0.07319919437733072, "learning_rate": 5.154227676322406e-06, "loss": 0.8371, "step": 262310 }, { "epoch": 4.605417932196843, "grad_norm": 0.059097121966824914, "learning_rate": 5.15143777338123e-06, "loss": 0.839, "step": 262320 }, { "epoch": 4.605593497076844, "grad_norm": 0.05311824750491535, "learning_rate": 5.148649084859696e-06, "loss": 0.8392, "step": 262330 }, { "epoch": 4.605769061956846, "grad_norm": 0.04885734624442374, "learning_rate": 5.145861610793154e-06, "loss": 0.8446, "step": 262340 }, { "epoch": 4.605944626836847, "grad_norm": 0.053978751727948165, "learning_rate": 5.143075351216916e-06, "loss": 0.8401, "step": 262350 }, { "epoch": 4.606120191716849, "grad_norm": 0.04899067526599813, "learning_rate": 5.140290306166266e-06, "loss": 0.8341, "step": 262360 }, { "epoch": 4.60629575659685, "grad_norm": 0.044814505905747794, "learning_rate": 5.137506475676539e-06, "loss": 0.8349, "step": 262370 }, { "epoch": 4.606471321476851, "grad_norm": 0.0567145014894231, "learning_rate": 5.134723859782952e-06, "loss": 0.8373, "step": 262380 }, { "epoch": 4.606646886356853, "grad_norm": 0.06982290778014882, "learning_rate": 5.131942458520808e-06, "loss": 0.8357, "step": 262390 }, { "epoch": 4.606822451236854, "grad_norm": 0.04626039203348567, "learning_rate": 5.129162271925312e-06, "loss": 0.838, "step": 262400 }, { "epoch": 4.606998016116856, "grad_norm": 0.04781626454207249, "learning_rate": 5.126383300031703e-06, "loss": 0.8374, "step": 262410 }, { "epoch": 4.607173580996857, "grad_norm": 0.0910695915867181, "learning_rate": 5.123605542875197e-06, "loss": 0.8343, "step": 262420 }, { "epoch": 4.607349145876859, "grad_norm": 0.05202094837408082, "learning_rate": 5.120829000490974e-06, "loss": 0.8414, "step": 262430 }, { "epoch": 4.60752471075686, "grad_norm": 0.05495286294745212, "learning_rate": 5.1180536729142304e-06, "loss": 0.8404, "step": 262440 }, { "epoch": 4.607700275636861, "grad_norm": 0.05312328069778979, "learning_rate": 5.115279560180125e-06, "loss": 0.8383, "step": 262450 }, { "epoch": 4.607875840516863, "grad_norm": 0.051559916420477137, "learning_rate": 5.112506662323811e-06, "loss": 0.847, "step": 262460 }, { "epoch": 4.608051405396864, "grad_norm": 0.052100321838481733, "learning_rate": 5.109734979380391e-06, "loss": 0.8386, "step": 262470 }, { "epoch": 4.608226970276866, "grad_norm": 0.07307196918693339, "learning_rate": 5.106964511385016e-06, "loss": 0.8411, "step": 262480 }, { "epoch": 4.608402535156867, "grad_norm": 0.04932808666309443, "learning_rate": 5.1041952583727925e-06, "loss": 0.8405, "step": 262490 }, { "epoch": 4.608578100036869, "grad_norm": 0.043303937756778946, "learning_rate": 5.101427220378782e-06, "loss": 0.8345, "step": 262500 }, { "epoch": 4.60875366491687, "grad_norm": 0.05384699597086268, "learning_rate": 5.098660397438068e-06, "loss": 0.8428, "step": 262510 }, { "epoch": 4.608929229796871, "grad_norm": 0.07842587886635578, "learning_rate": 5.095894789585714e-06, "loss": 0.8469, "step": 262520 }, { "epoch": 4.609104794676873, "grad_norm": 0.04178168786068841, "learning_rate": 5.093130396856726e-06, "loss": 0.8415, "step": 262530 }, { "epoch": 4.609280359556874, "grad_norm": 0.05524887552383218, "learning_rate": 5.090367219286186e-06, "loss": 0.8356, "step": 262540 }, { "epoch": 4.609455924436876, "grad_norm": 0.059085358925174705, "learning_rate": 5.08760525690906e-06, "loss": 0.8363, "step": 262550 }, { "epoch": 4.609631489316877, "grad_norm": 0.05098154862053491, "learning_rate": 5.084844509760366e-06, "loss": 0.8407, "step": 262560 }, { "epoch": 4.609807054196878, "grad_norm": 0.046775108907413, "learning_rate": 5.082084977875065e-06, "loss": 0.8369, "step": 262570 }, { "epoch": 4.60998261907688, "grad_norm": 0.051679098540568486, "learning_rate": 5.079326661288132e-06, "loss": 0.8405, "step": 262580 }, { "epoch": 4.610158183956881, "grad_norm": 0.051346225495821574, "learning_rate": 5.076569560034529e-06, "loss": 0.8356, "step": 262590 }, { "epoch": 4.610333748836883, "grad_norm": 0.05518616137037667, "learning_rate": 5.073813674149145e-06, "loss": 0.838, "step": 262600 }, { "epoch": 4.610509313716884, "grad_norm": 0.05915210079156219, "learning_rate": 5.071059003666939e-06, "loss": 0.8396, "step": 262610 }, { "epoch": 4.610684878596885, "grad_norm": 0.0420833242457466, "learning_rate": 5.0683055486228e-06, "loss": 0.8415, "step": 262620 }, { "epoch": 4.610860443476887, "grad_norm": 0.04372711856160739, "learning_rate": 5.065553309051625e-06, "loss": 0.843, "step": 262630 }, { "epoch": 4.611036008356888, "grad_norm": 0.05601461042901163, "learning_rate": 5.062802284988266e-06, "loss": 0.8416, "step": 262640 }, { "epoch": 4.61121157323689, "grad_norm": 0.05920866317005157, "learning_rate": 5.060052476467588e-06, "loss": 0.8424, "step": 262650 }, { "epoch": 4.611387138116891, "grad_norm": 0.05775269603359298, "learning_rate": 5.057303883524442e-06, "loss": 0.848, "step": 262660 }, { "epoch": 4.611562702996893, "grad_norm": 0.04803946052987673, "learning_rate": 5.0545565061936295e-06, "loss": 0.84, "step": 262670 }, { "epoch": 4.611738267876894, "grad_norm": 0.07518814730763504, "learning_rate": 5.051810344509979e-06, "loss": 0.8385, "step": 262680 }, { "epoch": 4.611913832756895, "grad_norm": 0.04255337104053123, "learning_rate": 5.049065398508278e-06, "loss": 0.8382, "step": 262690 }, { "epoch": 4.612089397636897, "grad_norm": 0.05312353697990771, "learning_rate": 5.046321668223293e-06, "loss": 0.8344, "step": 262700 }, { "epoch": 4.612264962516898, "grad_norm": 0.057074562687448795, "learning_rate": 5.0435791536898325e-06, "loss": 0.847, "step": 262710 }, { "epoch": 4.6124405273969, "grad_norm": 0.053746920406146315, "learning_rate": 5.0408378549425964e-06, "loss": 0.8358, "step": 262720 }, { "epoch": 4.612616092276901, "grad_norm": 0.055592490494945246, "learning_rate": 5.038097772016348e-06, "loss": 0.8438, "step": 262730 }, { "epoch": 4.612791657156903, "grad_norm": 0.05097585929062847, "learning_rate": 5.035358904945778e-06, "loss": 0.8288, "step": 262740 }, { "epoch": 4.612967222036904, "grad_norm": 0.04808025830276169, "learning_rate": 5.032621253765595e-06, "loss": 0.843, "step": 262750 }, { "epoch": 4.613142786916905, "grad_norm": 0.05258813890666286, "learning_rate": 5.02988481851052e-06, "loss": 0.835, "step": 262760 }, { "epoch": 4.613318351796907, "grad_norm": 0.0461579203531802, "learning_rate": 5.0271495992151656e-06, "loss": 0.8372, "step": 262770 }, { "epoch": 4.613493916676908, "grad_norm": 0.061252624265144724, "learning_rate": 5.02441559591424e-06, "loss": 0.8457, "step": 262780 }, { "epoch": 4.61366948155691, "grad_norm": 0.04343356785488192, "learning_rate": 5.021682808642357e-06, "loss": 0.8429, "step": 262790 }, { "epoch": 4.613845046436911, "grad_norm": 0.05250631926327239, "learning_rate": 5.018951237434148e-06, "loss": 0.8383, "step": 262800 }, { "epoch": 4.614020611316912, "grad_norm": 0.044954623872610074, "learning_rate": 5.016220882324216e-06, "loss": 0.8349, "step": 262810 }, { "epoch": 4.614196176196914, "grad_norm": 0.05493103409816574, "learning_rate": 5.013491743347168e-06, "loss": 0.8312, "step": 262820 }, { "epoch": 4.614371741076915, "grad_norm": 0.08134513376886711, "learning_rate": 5.0107638205375745e-06, "loss": 0.8346, "step": 262830 }, { "epoch": 4.614547305956917, "grad_norm": 0.04996722540237236, "learning_rate": 5.008037113929992e-06, "loss": 0.8315, "step": 262840 }, { "epoch": 4.614722870836918, "grad_norm": 0.0462794787400445, "learning_rate": 5.005311623558975e-06, "loss": 0.8439, "step": 262850 }, { "epoch": 4.614898435716919, "grad_norm": 0.051777807886414076, "learning_rate": 5.002587349459082e-06, "loss": 0.8332, "step": 262860 }, { "epoch": 4.615074000596921, "grad_norm": 0.05614153719100398, "learning_rate": 4.999864291664768e-06, "loss": 0.831, "step": 262870 }, { "epoch": 4.615249565476922, "grad_norm": 0.056770439184236084, "learning_rate": 4.997142450210602e-06, "loss": 0.8395, "step": 262880 }, { "epoch": 4.615425130356924, "grad_norm": 0.04367263970691313, "learning_rate": 4.994421825131019e-06, "loss": 0.8359, "step": 262890 }, { "epoch": 4.615600695236925, "grad_norm": 0.05575071262856129, "learning_rate": 4.991702416460531e-06, "loss": 0.8418, "step": 262900 }, { "epoch": 4.6157762601169265, "grad_norm": 0.07096368751848811, "learning_rate": 4.988984224233554e-06, "loss": 0.8361, "step": 262910 }, { "epoch": 4.615951824996928, "grad_norm": 0.06272715786380999, "learning_rate": 4.986267248484552e-06, "loss": 0.838, "step": 262920 }, { "epoch": 4.6161273898769295, "grad_norm": 0.04853821974672734, "learning_rate": 4.9835514892479635e-06, "loss": 0.8331, "step": 262930 }, { "epoch": 4.6163029547569305, "grad_norm": 0.057771270333987594, "learning_rate": 4.980836946558146e-06, "loss": 0.8359, "step": 262940 }, { "epoch": 4.616478519636932, "grad_norm": 0.05910407432109971, "learning_rate": 4.978123620449547e-06, "loss": 0.8316, "step": 262950 }, { "epoch": 4.6166540845169335, "grad_norm": 0.04522703855043616, "learning_rate": 4.975411510956523e-06, "loss": 0.8358, "step": 262960 }, { "epoch": 4.6168296493969345, "grad_norm": 0.051396536764259156, "learning_rate": 4.972700618113447e-06, "loss": 0.8482, "step": 262970 }, { "epoch": 4.617005214276936, "grad_norm": 0.09120769729766955, "learning_rate": 4.969990941954641e-06, "loss": 0.8369, "step": 262980 }, { "epoch": 4.6171807791569375, "grad_norm": 0.06346136899703875, "learning_rate": 4.9672824825144536e-06, "loss": 0.8343, "step": 262990 }, { "epoch": 4.6173563440369385, "grad_norm": 0.046718119683393844, "learning_rate": 4.964575239827222e-06, "loss": 0.8359, "step": 263000 }, { "epoch": 4.61753190891694, "grad_norm": 0.05667750561201669, "learning_rate": 4.961869213927205e-06, "loss": 0.8435, "step": 263010 }, { "epoch": 4.6177074737969415, "grad_norm": 0.05167457695056829, "learning_rate": 4.959164404848718e-06, "loss": 0.8398, "step": 263020 }, { "epoch": 4.617883038676943, "grad_norm": 0.05122828874051314, "learning_rate": 4.9564608126260315e-06, "loss": 0.8385, "step": 263030 }, { "epoch": 4.618058603556944, "grad_norm": 0.05694113128788712, "learning_rate": 4.953758437293382e-06, "loss": 0.8369, "step": 263040 }, { "epoch": 4.6182341684369455, "grad_norm": 0.07268963361741519, "learning_rate": 4.951057278885029e-06, "loss": 0.8343, "step": 263050 }, { "epoch": 4.618409733316947, "grad_norm": 0.05257455845356825, "learning_rate": 4.948357337435181e-06, "loss": 0.8342, "step": 263060 }, { "epoch": 4.618585298196948, "grad_norm": 0.05482131991244792, "learning_rate": 4.945658612978059e-06, "loss": 0.8376, "step": 263070 }, { "epoch": 4.61876086307695, "grad_norm": 0.05934558141284393, "learning_rate": 4.942961105547848e-06, "loss": 0.8301, "step": 263080 }, { "epoch": 4.618936427956951, "grad_norm": 0.0619729367028487, "learning_rate": 4.940264815178732e-06, "loss": 0.8452, "step": 263090 }, { "epoch": 4.619111992836953, "grad_norm": 0.04987904305540367, "learning_rate": 4.937569741904871e-06, "loss": 0.8445, "step": 263100 }, { "epoch": 4.619287557716954, "grad_norm": 0.06266614463261098, "learning_rate": 4.934875885760402e-06, "loss": 0.8299, "step": 263110 }, { "epoch": 4.619463122596955, "grad_norm": 0.05294933357515187, "learning_rate": 4.932183246779477e-06, "loss": 0.8421, "step": 263120 }, { "epoch": 4.619638687476957, "grad_norm": 0.045749846321355425, "learning_rate": 4.9294918249962005e-06, "loss": 0.835, "step": 263130 }, { "epoch": 4.619814252356958, "grad_norm": 0.04147609003910252, "learning_rate": 4.926801620444678e-06, "loss": 0.8292, "step": 263140 }, { "epoch": 4.61998981723696, "grad_norm": 0.05442572435130432, "learning_rate": 4.9241126331589826e-06, "loss": 0.8385, "step": 263150 }, { "epoch": 4.620165382116961, "grad_norm": 0.05270573245391319, "learning_rate": 4.921424863173187e-06, "loss": 0.8385, "step": 263160 }, { "epoch": 4.620340946996963, "grad_norm": 0.055721524913208635, "learning_rate": 4.918738310521376e-06, "loss": 0.837, "step": 263170 }, { "epoch": 4.620516511876964, "grad_norm": 0.04628580346369833, "learning_rate": 4.916052975237544e-06, "loss": 0.8387, "step": 263180 }, { "epoch": 4.620692076756965, "grad_norm": 0.048811936017096685, "learning_rate": 4.9133688573557405e-06, "loss": 0.8372, "step": 263190 }, { "epoch": 4.620867641636967, "grad_norm": 0.062153748121667074, "learning_rate": 4.910685956909976e-06, "loss": 0.8389, "step": 263200 }, { "epoch": 4.621043206516968, "grad_norm": 0.05042802192285471, "learning_rate": 4.90800427393421e-06, "loss": 0.8409, "step": 263210 }, { "epoch": 4.62121877139697, "grad_norm": 0.06057469196257636, "learning_rate": 4.905323808462474e-06, "loss": 0.8301, "step": 263220 }, { "epoch": 4.621394336276971, "grad_norm": 0.06739354189975966, "learning_rate": 4.902644560528684e-06, "loss": 0.8443, "step": 263230 }, { "epoch": 4.621569901156972, "grad_norm": 0.0591609275848092, "learning_rate": 4.899966530166827e-06, "loss": 0.832, "step": 263240 }, { "epoch": 4.621745466036974, "grad_norm": 0.05256360618515577, "learning_rate": 4.8972897174107885e-06, "loss": 0.8424, "step": 263250 }, { "epoch": 4.621921030916975, "grad_norm": 0.05703287172557032, "learning_rate": 4.894614122294509e-06, "loss": 0.8377, "step": 263260 }, { "epoch": 4.622096595796977, "grad_norm": 0.05289163300333757, "learning_rate": 4.8919397448519075e-06, "loss": 0.8425, "step": 263270 }, { "epoch": 4.622272160676978, "grad_norm": 0.05701270022284537, "learning_rate": 4.8892665851168255e-06, "loss": 0.8425, "step": 263280 }, { "epoch": 4.622447725556979, "grad_norm": 0.050830293602182976, "learning_rate": 4.8865946431231705e-06, "loss": 0.8323, "step": 263290 }, { "epoch": 4.622623290436981, "grad_norm": 0.058492664003798836, "learning_rate": 4.883923918904763e-06, "loss": 0.8387, "step": 263300 }, { "epoch": 4.622798855316982, "grad_norm": 0.04345870283496826, "learning_rate": 4.881254412495491e-06, "loss": 0.8329, "step": 263310 }, { "epoch": 4.622974420196984, "grad_norm": 0.0446948152263672, "learning_rate": 4.8785861239291154e-06, "loss": 0.8294, "step": 263320 }, { "epoch": 4.623149985076985, "grad_norm": 0.055409064261018555, "learning_rate": 4.875919053239493e-06, "loss": 0.8407, "step": 263330 }, { "epoch": 4.623325549956987, "grad_norm": 0.052357296106880453, "learning_rate": 4.873253200460396e-06, "loss": 0.8402, "step": 263340 }, { "epoch": 4.623501114836988, "grad_norm": 0.038652277936487685, "learning_rate": 4.8705885656256045e-06, "loss": 0.8412, "step": 263350 }, { "epoch": 4.623676679716989, "grad_norm": 0.04766647111306548, "learning_rate": 4.867925148768869e-06, "loss": 0.8391, "step": 263360 }, { "epoch": 4.623852244596991, "grad_norm": 0.047121225934717646, "learning_rate": 4.865262949923957e-06, "loss": 0.8339, "step": 263370 }, { "epoch": 4.624027809476992, "grad_norm": 0.0491039341734633, "learning_rate": 4.862601969124565e-06, "loss": 0.8324, "step": 263380 }, { "epoch": 4.624203374356994, "grad_norm": 0.040658664287921524, "learning_rate": 4.859942206404461e-06, "loss": 0.8428, "step": 263390 }, { "epoch": 4.624378939236995, "grad_norm": 0.05381771804776356, "learning_rate": 4.857283661797298e-06, "loss": 0.8424, "step": 263400 }, { "epoch": 4.624554504116997, "grad_norm": 0.05856686683448303, "learning_rate": 4.854626335336786e-06, "loss": 0.8375, "step": 263410 }, { "epoch": 4.624730068996998, "grad_norm": 0.06411387200453864, "learning_rate": 4.851970227056581e-06, "loss": 0.8355, "step": 263420 }, { "epoch": 4.624905633876999, "grad_norm": 0.04882954916766021, "learning_rate": 4.849315336990327e-06, "loss": 0.8472, "step": 263430 }, { "epoch": 4.625081198757001, "grad_norm": 0.044941394462790524, "learning_rate": 4.8466616651717e-06, "loss": 0.8379, "step": 263440 }, { "epoch": 4.625256763637002, "grad_norm": 0.07158229051450449, "learning_rate": 4.8440092116342695e-06, "loss": 0.8367, "step": 263450 }, { "epoch": 4.625432328517004, "grad_norm": 0.05687229797293317, "learning_rate": 4.841357976411687e-06, "loss": 0.8375, "step": 263460 }, { "epoch": 4.625607893397005, "grad_norm": 0.042359086321344816, "learning_rate": 4.8387079595375226e-06, "loss": 0.8356, "step": 263470 }, { "epoch": 4.625783458277006, "grad_norm": 0.05813180155967756, "learning_rate": 4.836059161045364e-06, "loss": 0.8398, "step": 263480 }, { "epoch": 4.625959023157008, "grad_norm": 0.05782432008685561, "learning_rate": 4.833411580968758e-06, "loss": 0.8373, "step": 263490 }, { "epoch": 4.626134588037009, "grad_norm": 0.052200365765224785, "learning_rate": 4.830765219341258e-06, "loss": 0.8312, "step": 263500 }, { "epoch": 4.626310152917011, "grad_norm": 0.05733073218346035, "learning_rate": 4.8281200761964015e-06, "loss": 0.839, "step": 263510 }, { "epoch": 4.626485717797012, "grad_norm": 0.04978269654501517, "learning_rate": 4.825476151567687e-06, "loss": 0.8388, "step": 263520 }, { "epoch": 4.626661282677013, "grad_norm": 0.05313757520639269, "learning_rate": 4.822833445488628e-06, "loss": 0.8431, "step": 263530 }, { "epoch": 4.626836847557015, "grad_norm": 0.05137794073631298, "learning_rate": 4.8201919579927035e-06, "loss": 0.8395, "step": 263540 }, { "epoch": 4.627012412437016, "grad_norm": 0.06477121149850469, "learning_rate": 4.817551689113359e-06, "loss": 0.8395, "step": 263550 }, { "epoch": 4.627187977317018, "grad_norm": 0.05407319035452907, "learning_rate": 4.814912638884086e-06, "loss": 0.8311, "step": 263560 }, { "epoch": 4.627363542197019, "grad_norm": 0.053140472471945935, "learning_rate": 4.812274807338297e-06, "loss": 0.8336, "step": 263570 }, { "epoch": 4.627539107077021, "grad_norm": 0.05042258001666748, "learning_rate": 4.809638194509438e-06, "loss": 0.8338, "step": 263580 }, { "epoch": 4.627714671957022, "grad_norm": 0.06368165461628876, "learning_rate": 4.80700280043088e-06, "loss": 0.8418, "step": 263590 }, { "epoch": 4.627890236837024, "grad_norm": 0.0860924895162878, "learning_rate": 4.804368625136022e-06, "loss": 0.8377, "step": 263600 }, { "epoch": 4.628065801717025, "grad_norm": 0.06020127438513643, "learning_rate": 4.8017356686582805e-06, "loss": 0.8431, "step": 263610 }, { "epoch": 4.628241366597026, "grad_norm": 0.055716901130259934, "learning_rate": 4.799103931030946e-06, "loss": 0.8349, "step": 263620 }, { "epoch": 4.628416931477028, "grad_norm": 0.06036734573620436, "learning_rate": 4.796473412287432e-06, "loss": 0.8321, "step": 263630 }, { "epoch": 4.628592496357029, "grad_norm": 0.05067152888298102, "learning_rate": 4.793844112461021e-06, "loss": 0.8382, "step": 263640 }, { "epoch": 4.6287680612370306, "grad_norm": 0.0579744521204318, "learning_rate": 4.791216031585059e-06, "loss": 0.8417, "step": 263650 }, { "epoch": 4.628943626117032, "grad_norm": 0.05116158733335425, "learning_rate": 4.788589169692818e-06, "loss": 0.8339, "step": 263660 }, { "epoch": 4.629119190997033, "grad_norm": 0.04185320358728536, "learning_rate": 4.785963526817588e-06, "loss": 0.8429, "step": 263670 }, { "epoch": 4.6292947558770345, "grad_norm": 0.05687132624824908, "learning_rate": 4.783339102992643e-06, "loss": 0.836, "step": 263680 }, { "epoch": 4.629470320757036, "grad_norm": 0.046485638068536374, "learning_rate": 4.780715898251238e-06, "loss": 0.835, "step": 263690 }, { "epoch": 4.6296458856370375, "grad_norm": 0.04669829816552097, "learning_rate": 4.778093912626591e-06, "loss": 0.8381, "step": 263700 }, { "epoch": 4.6298214505170385, "grad_norm": 0.043830703864413, "learning_rate": 4.775473146151939e-06, "loss": 0.8412, "step": 263710 }, { "epoch": 4.62999701539704, "grad_norm": 0.059805785652753206, "learning_rate": 4.7728535988604755e-06, "loss": 0.8366, "step": 263720 }, { "epoch": 4.6301725802770415, "grad_norm": 0.04611958108518331, "learning_rate": 4.7702352707854154e-06, "loss": 0.8325, "step": 263730 }, { "epoch": 4.6303481451570425, "grad_norm": 0.046361568295242586, "learning_rate": 4.767618161959909e-06, "loss": 0.8395, "step": 263740 }, { "epoch": 4.630523710037044, "grad_norm": 0.049824043041016175, "learning_rate": 4.765002272417128e-06, "loss": 0.8316, "step": 263750 }, { "epoch": 4.6306992749170455, "grad_norm": 0.04576663540817222, "learning_rate": 4.76238760219021e-06, "loss": 0.8424, "step": 263760 }, { "epoch": 4.6308748397970465, "grad_norm": 0.05367256957331146, "learning_rate": 4.759774151312271e-06, "loss": 0.8372, "step": 263770 }, { "epoch": 4.631050404677048, "grad_norm": 0.046570342556588346, "learning_rate": 4.757161919816462e-06, "loss": 0.8432, "step": 263780 }, { "epoch": 4.6312259695570495, "grad_norm": 0.047326827383342096, "learning_rate": 4.754550907735832e-06, "loss": 0.8363, "step": 263790 }, { "epoch": 4.631401534437051, "grad_norm": 0.06889011066153468, "learning_rate": 4.751941115103499e-06, "loss": 0.836, "step": 263800 }, { "epoch": 4.631577099317052, "grad_norm": 0.05235640554535179, "learning_rate": 4.749332541952513e-06, "loss": 0.8459, "step": 263810 }, { "epoch": 4.631752664197054, "grad_norm": 0.05378356383435131, "learning_rate": 4.746725188315947e-06, "loss": 0.8287, "step": 263820 }, { "epoch": 4.631928229077055, "grad_norm": 0.04226826069514739, "learning_rate": 4.744119054226806e-06, "loss": 0.8414, "step": 263830 }, { "epoch": 4.632103793957057, "grad_norm": 0.054813398870379025, "learning_rate": 4.7415141397181204e-06, "loss": 0.8332, "step": 263840 }, { "epoch": 4.632279358837058, "grad_norm": 0.05163006131865313, "learning_rate": 4.738910444822906e-06, "loss": 0.8442, "step": 263850 }, { "epoch": 4.632454923717059, "grad_norm": 0.04514845080522203, "learning_rate": 4.736307969574138e-06, "loss": 0.835, "step": 263860 }, { "epoch": 4.632630488597061, "grad_norm": 0.06057420187381858, "learning_rate": 4.7337067140048e-06, "loss": 0.8374, "step": 263870 }, { "epoch": 4.632806053477062, "grad_norm": 0.05646773680468028, "learning_rate": 4.7311066781478566e-06, "loss": 0.8397, "step": 263880 }, { "epoch": 4.632981618357064, "grad_norm": 0.04375916414288567, "learning_rate": 4.728507862036223e-06, "loss": 0.8345, "step": 263890 }, { "epoch": 4.633157183237065, "grad_norm": 0.06931433681885321, "learning_rate": 4.725910265702866e-06, "loss": 0.8357, "step": 263900 }, { "epoch": 4.633332748117066, "grad_norm": 0.06106629883021386, "learning_rate": 4.723313889180655e-06, "loss": 0.8404, "step": 263910 }, { "epoch": 4.633508312997068, "grad_norm": 0.05407512216471056, "learning_rate": 4.720718732502525e-06, "loss": 0.8352, "step": 263920 }, { "epoch": 4.633683877877069, "grad_norm": 0.04802963544510195, "learning_rate": 4.7181247957013365e-06, "loss": 0.8424, "step": 263930 }, { "epoch": 4.633859442757071, "grad_norm": 0.043298928569042286, "learning_rate": 4.7155320788099555e-06, "loss": 0.839, "step": 263940 }, { "epoch": 4.634035007637072, "grad_norm": 0.05369850193889572, "learning_rate": 4.712940581861243e-06, "loss": 0.8464, "step": 263950 }, { "epoch": 4.634210572517073, "grad_norm": 0.07332100773749704, "learning_rate": 4.71035030488801e-06, "loss": 0.8403, "step": 263960 }, { "epoch": 4.634386137397075, "grad_norm": 0.0539247649336384, "learning_rate": 4.707761247923108e-06, "loss": 0.8411, "step": 263970 }, { "epoch": 4.634561702277076, "grad_norm": 0.05588946147346, "learning_rate": 4.705173410999324e-06, "loss": 0.8415, "step": 263980 }, { "epoch": 4.634737267157078, "grad_norm": 0.047453320384465474, "learning_rate": 4.702586794149445e-06, "loss": 0.8365, "step": 263990 }, { "epoch": 4.634912832037079, "grad_norm": 0.054461226212843175, "learning_rate": 4.700001397406246e-06, "loss": 0.8311, "step": 264000 }, { "epoch": 4.635088396917081, "grad_norm": 0.05527298465300057, "learning_rate": 4.697417220802483e-06, "loss": 0.8399, "step": 264010 }, { "epoch": 4.635263961797082, "grad_norm": 0.043396981858347956, "learning_rate": 4.694834264370908e-06, "loss": 0.8374, "step": 264020 }, { "epoch": 4.635439526677083, "grad_norm": 0.053911617901286644, "learning_rate": 4.692252528144231e-06, "loss": 0.842, "step": 264030 }, { "epoch": 4.635615091557085, "grad_norm": 0.06413575983799963, "learning_rate": 4.689672012155172e-06, "loss": 0.832, "step": 264040 }, { "epoch": 4.635790656437086, "grad_norm": 0.05061685479676133, "learning_rate": 4.687092716436442e-06, "loss": 0.8315, "step": 264050 }, { "epoch": 4.635966221317088, "grad_norm": 0.05443206285599027, "learning_rate": 4.684514641020675e-06, "loss": 0.8404, "step": 264060 }, { "epoch": 4.636141786197089, "grad_norm": 0.04888814628604209, "learning_rate": 4.681937785940601e-06, "loss": 0.8357, "step": 264070 }, { "epoch": 4.636317351077091, "grad_norm": 0.04854432052662412, "learning_rate": 4.679362151228808e-06, "loss": 0.844, "step": 264080 }, { "epoch": 4.636492915957092, "grad_norm": 0.05699046634576645, "learning_rate": 4.676787736917976e-06, "loss": 0.8401, "step": 264090 }, { "epoch": 4.636668480837093, "grad_norm": 0.0445616594382379, "learning_rate": 4.674214543040691e-06, "loss": 0.8347, "step": 264100 }, { "epoch": 4.636844045717095, "grad_norm": 0.056954416934572163, "learning_rate": 4.671642569629566e-06, "loss": 0.8408, "step": 264110 }, { "epoch": 4.637019610597096, "grad_norm": 0.052457087878382234, "learning_rate": 4.66907181671721e-06, "loss": 0.8368, "step": 264120 }, { "epoch": 4.637195175477098, "grad_norm": 0.05382374542750904, "learning_rate": 4.666502284336148e-06, "loss": 0.8437, "step": 264130 }, { "epoch": 4.637370740357099, "grad_norm": 0.059237666047893975, "learning_rate": 4.663933972518979e-06, "loss": 0.8375, "step": 264140 }, { "epoch": 4.6375463052371, "grad_norm": 0.06504153986373828, "learning_rate": 4.661366881298225e-06, "loss": 0.8362, "step": 264150 }, { "epoch": 4.637721870117102, "grad_norm": 0.06004505161552498, "learning_rate": 4.658801010706433e-06, "loss": 0.8416, "step": 264160 }, { "epoch": 4.637897434997103, "grad_norm": 0.06547172258951575, "learning_rate": 4.65623636077608e-06, "loss": 0.8342, "step": 264170 }, { "epoch": 4.638072999877105, "grad_norm": 0.04947597606839504, "learning_rate": 4.653672931539669e-06, "loss": 0.8434, "step": 264180 }, { "epoch": 4.638248564757106, "grad_norm": 0.05051210533303105, "learning_rate": 4.65111072302971e-06, "loss": 0.8353, "step": 264190 }, { "epoch": 4.638424129637107, "grad_norm": 0.0529432312542431, "learning_rate": 4.648549735278628e-06, "loss": 0.8388, "step": 264200 }, { "epoch": 4.638599694517109, "grad_norm": 0.05661174988420741, "learning_rate": 4.645989968318881e-06, "loss": 0.8497, "step": 264210 }, { "epoch": 4.63877525939711, "grad_norm": 0.05163364901701673, "learning_rate": 4.6434314221829245e-06, "loss": 0.8357, "step": 264220 }, { "epoch": 4.638950824277112, "grad_norm": 0.06851313790590353, "learning_rate": 4.640874096903138e-06, "loss": 0.8367, "step": 264230 }, { "epoch": 4.639126389157113, "grad_norm": 0.06453176525859473, "learning_rate": 4.638317992511959e-06, "loss": 0.8372, "step": 264240 }, { "epoch": 4.639301954037115, "grad_norm": 0.05374193585135334, "learning_rate": 4.635763109041755e-06, "loss": 0.8278, "step": 264250 }, { "epoch": 4.639477518917116, "grad_norm": 0.06136486703120454, "learning_rate": 4.633209446524907e-06, "loss": 0.8377, "step": 264260 }, { "epoch": 4.639653083797118, "grad_norm": 0.05491593206210102, "learning_rate": 4.630657004993749e-06, "loss": 0.8326, "step": 264270 }, { "epoch": 4.639828648677119, "grad_norm": 0.04946396503472428, "learning_rate": 4.6281057844806425e-06, "loss": 0.8447, "step": 264280 }, { "epoch": 4.64000421355712, "grad_norm": 0.03698858190281376, "learning_rate": 4.62555578501791e-06, "loss": 0.846, "step": 264290 }, { "epoch": 4.640179778437122, "grad_norm": 0.04761279721875644, "learning_rate": 4.6230070066378444e-06, "loss": 0.8386, "step": 264300 }, { "epoch": 4.640355343317123, "grad_norm": 0.05439919460960522, "learning_rate": 4.62045944937276e-06, "loss": 0.8316, "step": 264310 }, { "epoch": 4.640530908197125, "grad_norm": 0.06010205509793542, "learning_rate": 4.617913113254927e-06, "loss": 0.8342, "step": 264320 }, { "epoch": 4.640706473077126, "grad_norm": 0.04912736924635315, "learning_rate": 4.615367998316604e-06, "loss": 0.8366, "step": 264330 }, { "epoch": 4.640882037957127, "grad_norm": 0.04855515342329472, "learning_rate": 4.61282410459004e-06, "loss": 0.84, "step": 264340 }, { "epoch": 4.641057602837129, "grad_norm": 0.05140429306159192, "learning_rate": 4.610281432107462e-06, "loss": 0.8377, "step": 264350 }, { "epoch": 4.64123316771713, "grad_norm": 0.044904946826181825, "learning_rate": 4.607739980901105e-06, "loss": 0.8419, "step": 264360 }, { "epoch": 4.641408732597132, "grad_norm": 0.05248743625410116, "learning_rate": 4.605199751003142e-06, "loss": 0.8394, "step": 264370 }, { "epoch": 4.641584297477133, "grad_norm": 0.06750938117804353, "learning_rate": 4.602660742445776e-06, "loss": 0.8302, "step": 264380 }, { "epoch": 4.641759862357134, "grad_norm": 0.055817008573486586, "learning_rate": 4.60012295526118e-06, "loss": 0.8419, "step": 264390 }, { "epoch": 4.641935427237136, "grad_norm": 0.05833335479797113, "learning_rate": 4.597586389481478e-06, "loss": 0.8394, "step": 264400 }, { "epoch": 4.642110992117137, "grad_norm": 0.0639116823553392, "learning_rate": 4.595051045138857e-06, "loss": 0.8377, "step": 264410 }, { "epoch": 4.6422865569971385, "grad_norm": 0.05495472563710806, "learning_rate": 4.5925169222654074e-06, "loss": 0.8427, "step": 264420 }, { "epoch": 4.64246212187714, "grad_norm": 0.049673957355314574, "learning_rate": 4.589984020893247e-06, "loss": 0.8396, "step": 264430 }, { "epoch": 4.642637686757141, "grad_norm": 0.0528974969585916, "learning_rate": 4.5874523410544705e-06, "loss": 0.8362, "step": 264440 }, { "epoch": 4.6428132516371425, "grad_norm": 0.04728634281305369, "learning_rate": 4.584921882781139e-06, "loss": 0.8351, "step": 264450 }, { "epoch": 4.642988816517144, "grad_norm": 0.059593540551551784, "learning_rate": 4.5823926461053355e-06, "loss": 0.8416, "step": 264460 }, { "epoch": 4.6431643813971455, "grad_norm": 0.06066644470220249, "learning_rate": 4.579864631059077e-06, "loss": 0.8442, "step": 264470 }, { "epoch": 4.6433399462771465, "grad_norm": 0.04789284813042157, "learning_rate": 4.5773378376744256e-06, "loss": 0.8356, "step": 264480 }, { "epoch": 4.6435155111571484, "grad_norm": 0.05366846749261967, "learning_rate": 4.574812265983387e-06, "loss": 0.8422, "step": 264490 }, { "epoch": 4.6436910760371495, "grad_norm": 0.0564483639828134, "learning_rate": 4.572287916017958e-06, "loss": 0.8394, "step": 264500 }, { "epoch": 4.643866640917151, "grad_norm": 0.06266667345718442, "learning_rate": 4.569764787810111e-06, "loss": 0.8405, "step": 264510 }, { "epoch": 4.644042205797152, "grad_norm": 0.049883230122205226, "learning_rate": 4.56724288139183e-06, "loss": 0.8328, "step": 264520 }, { "epoch": 4.6442177706771535, "grad_norm": 0.04784044354695512, "learning_rate": 4.564722196795067e-06, "loss": 0.8322, "step": 264530 }, { "epoch": 4.644393335557155, "grad_norm": 0.053882594353663125, "learning_rate": 4.56220273405174e-06, "loss": 0.8251, "step": 264540 }, { "epoch": 4.644568900437156, "grad_norm": 0.058783904975322356, "learning_rate": 4.559684493193803e-06, "loss": 0.8381, "step": 264550 }, { "epoch": 4.644744465317158, "grad_norm": 0.04566626570343365, "learning_rate": 4.557167474253148e-06, "loss": 0.8389, "step": 264560 }, { "epoch": 4.644920030197159, "grad_norm": 0.059072916742805036, "learning_rate": 4.554651677261642e-06, "loss": 0.8363, "step": 264570 }, { "epoch": 4.64509559507716, "grad_norm": 0.042835601116283976, "learning_rate": 4.552137102251202e-06, "loss": 0.846, "step": 264580 }, { "epoch": 4.645271159957162, "grad_norm": 0.049494970491922335, "learning_rate": 4.549623749253659e-06, "loss": 0.8361, "step": 264590 }, { "epoch": 4.645446724837163, "grad_norm": 0.05260355469612841, "learning_rate": 4.547111618300887e-06, "loss": 0.8354, "step": 264600 }, { "epoch": 4.645622289717165, "grad_norm": 0.05540646271552572, "learning_rate": 4.544600709424673e-06, "loss": 0.8388, "step": 264610 }, { "epoch": 4.645797854597166, "grad_norm": 0.04919611734093772, "learning_rate": 4.542091022656871e-06, "loss": 0.8343, "step": 264620 }, { "epoch": 4.645973419477167, "grad_norm": 0.0635725149924104, "learning_rate": 4.5395825580292525e-06, "loss": 0.8436, "step": 264630 }, { "epoch": 4.646148984357169, "grad_norm": 0.05461728408908432, "learning_rate": 4.537075315573598e-06, "loss": 0.8447, "step": 264640 }, { "epoch": 4.64632454923717, "grad_norm": 0.054644460819380465, "learning_rate": 4.5345692953217114e-06, "loss": 0.8401, "step": 264650 }, { "epoch": 4.646500114117172, "grad_norm": 0.04248622998908917, "learning_rate": 4.532064497305307e-06, "loss": 0.8415, "step": 264660 }, { "epoch": 4.646675678997173, "grad_norm": 0.05538328940121989, "learning_rate": 4.5295609215561345e-06, "loss": 0.8339, "step": 264670 }, { "epoch": 4.646851243877175, "grad_norm": 0.057060999244015906, "learning_rate": 4.527058568105915e-06, "loss": 0.8382, "step": 264680 }, { "epoch": 4.647026808757176, "grad_norm": 0.04465735672643092, "learning_rate": 4.524557436986339e-06, "loss": 0.8366, "step": 264690 }, { "epoch": 4.647202373637177, "grad_norm": 0.04946075945717429, "learning_rate": 4.522057528229136e-06, "loss": 0.8383, "step": 264700 }, { "epoch": 4.647377938517179, "grad_norm": 0.05385920999558381, "learning_rate": 4.5195588418659285e-06, "loss": 0.841, "step": 264710 }, { "epoch": 4.64755350339718, "grad_norm": 0.07435038655326356, "learning_rate": 4.517061377928414e-06, "loss": 0.8323, "step": 264720 }, { "epoch": 4.647729068277182, "grad_norm": 0.050856604315457674, "learning_rate": 4.514565136448226e-06, "loss": 0.8373, "step": 264730 }, { "epoch": 4.647904633157183, "grad_norm": 0.04655821582361148, "learning_rate": 4.5120701174569636e-06, "loss": 0.8424, "step": 264740 }, { "epoch": 4.648080198037185, "grad_norm": 0.048585196260082425, "learning_rate": 4.509576320986295e-06, "loss": 0.8368, "step": 264750 }, { "epoch": 4.648255762917186, "grad_norm": 0.06004243154013982, "learning_rate": 4.507083747067771e-06, "loss": 0.848, "step": 264760 }, { "epoch": 4.648431327797187, "grad_norm": 0.051834450867647715, "learning_rate": 4.504592395733007e-06, "loss": 0.837, "step": 264770 }, { "epoch": 4.648606892677189, "grad_norm": 0.050074053879178335, "learning_rate": 4.502102267013534e-06, "loss": 0.8463, "step": 264780 }, { "epoch": 4.64878245755719, "grad_norm": 0.04440352469306151, "learning_rate": 4.499613360940922e-06, "loss": 0.8318, "step": 264790 }, { "epoch": 4.648958022437192, "grad_norm": 0.045345144798561145, "learning_rate": 4.4971256775467e-06, "loss": 0.8357, "step": 264800 }, { "epoch": 4.649133587317193, "grad_norm": 0.06895106016344742, "learning_rate": 4.494639216862383e-06, "loss": 0.8388, "step": 264810 }, { "epoch": 4.649309152197194, "grad_norm": 0.07512498293195086, "learning_rate": 4.492153978919504e-06, "loss": 0.8402, "step": 264820 }, { "epoch": 4.649484717077196, "grad_norm": 0.057100210185968565, "learning_rate": 4.48966996374951e-06, "loss": 0.8456, "step": 264830 }, { "epoch": 4.649660281957197, "grad_norm": 0.05350265157751752, "learning_rate": 4.487187171383911e-06, "loss": 0.8426, "step": 264840 }, { "epoch": 4.649835846837199, "grad_norm": 0.05349509418493416, "learning_rate": 4.484705601854131e-06, "loss": 0.8361, "step": 264850 }, { "epoch": 4.6500114117172, "grad_norm": 0.07651858417423978, "learning_rate": 4.482225255191639e-06, "loss": 0.848, "step": 264860 }, { "epoch": 4.650186976597201, "grad_norm": 0.05679377477585151, "learning_rate": 4.479746131427848e-06, "loss": 0.8346, "step": 264870 }, { "epoch": 4.650362541477203, "grad_norm": 0.05149358211906644, "learning_rate": 4.4772682305941695e-06, "loss": 0.8382, "step": 264880 }, { "epoch": 4.650538106357204, "grad_norm": 0.05693837541040947, "learning_rate": 4.474791552722006e-06, "loss": 0.8415, "step": 264890 }, { "epoch": 4.650713671237206, "grad_norm": 0.047587807615302256, "learning_rate": 4.472316097842726e-06, "loss": 0.8339, "step": 264900 }, { "epoch": 4.650889236117207, "grad_norm": 0.044833395714368104, "learning_rate": 4.469841865987699e-06, "loss": 0.8389, "step": 264910 }, { "epoch": 4.651064800997209, "grad_norm": 0.04571273076395456, "learning_rate": 4.467368857188284e-06, "loss": 0.8369, "step": 264920 }, { "epoch": 4.65124036587721, "grad_norm": 0.0466372407697736, "learning_rate": 4.464897071475793e-06, "loss": 0.8383, "step": 264930 }, { "epoch": 4.651415930757212, "grad_norm": 0.05677768397981546, "learning_rate": 4.462426508881573e-06, "loss": 0.8392, "step": 264940 }, { "epoch": 4.651591495637213, "grad_norm": 0.05421100304244635, "learning_rate": 4.459957169436907e-06, "loss": 0.8381, "step": 264950 }, { "epoch": 4.651767060517214, "grad_norm": 0.05841488381149386, "learning_rate": 4.4574890531730735e-06, "loss": 0.8475, "step": 264960 }, { "epoch": 4.651942625397216, "grad_norm": 0.05214521062383346, "learning_rate": 4.455022160121377e-06, "loss": 0.8457, "step": 264970 }, { "epoch": 4.652118190277217, "grad_norm": 0.05029615289520486, "learning_rate": 4.452556490313021e-06, "loss": 0.8403, "step": 264980 }, { "epoch": 4.652293755157219, "grad_norm": 0.04781227447441461, "learning_rate": 4.4500920437793084e-06, "loss": 0.8416, "step": 264990 }, { "epoch": 4.65246932003722, "grad_norm": 0.054825189004599914, "learning_rate": 4.447628820551411e-06, "loss": 0.8417, "step": 265000 }, { "epoch": 4.652644884917221, "grad_norm": 0.06403652573769732, "learning_rate": 4.445166820660576e-06, "loss": 0.8304, "step": 265010 }, { "epoch": 4.652820449797223, "grad_norm": 0.04984875271368665, "learning_rate": 4.442706044137974e-06, "loss": 0.8416, "step": 265020 }, { "epoch": 4.652996014677224, "grad_norm": 0.06695252630449153, "learning_rate": 4.440246491014789e-06, "loss": 0.8352, "step": 265030 }, { "epoch": 4.653171579557226, "grad_norm": 0.04842746678489963, "learning_rate": 4.437788161322201e-06, "loss": 0.8326, "step": 265040 }, { "epoch": 4.653347144437227, "grad_norm": 0.06196139396821081, "learning_rate": 4.4353310550913265e-06, "loss": 0.8316, "step": 265050 }, { "epoch": 4.653522709317228, "grad_norm": 0.04622376174012648, "learning_rate": 4.432875172353328e-06, "loss": 0.8482, "step": 265060 }, { "epoch": 4.65369827419723, "grad_norm": 0.041362679145409954, "learning_rate": 4.430420513139307e-06, "loss": 0.84, "step": 265070 }, { "epoch": 4.653873839077231, "grad_norm": 0.04227888058076195, "learning_rate": 4.427967077480348e-06, "loss": 0.8417, "step": 265080 }, { "epoch": 4.654049403957233, "grad_norm": 0.0533132633056673, "learning_rate": 4.425514865407569e-06, "loss": 0.8447, "step": 265090 }, { "epoch": 4.654224968837234, "grad_norm": 0.048646408254322544, "learning_rate": 4.4230638769520175e-06, "loss": 0.8381, "step": 265100 }, { "epoch": 4.654400533717235, "grad_norm": 0.046631669255269376, "learning_rate": 4.4206141121447664e-06, "loss": 0.8413, "step": 265110 }, { "epoch": 4.654576098597237, "grad_norm": 0.04617452873503002, "learning_rate": 4.418165571016833e-06, "loss": 0.8377, "step": 265120 }, { "epoch": 4.654751663477238, "grad_norm": 0.04915997425296721, "learning_rate": 4.415718253599257e-06, "loss": 0.8384, "step": 265130 }, { "epoch": 4.65492722835724, "grad_norm": 0.08156091240670191, "learning_rate": 4.413272159923045e-06, "loss": 0.8369, "step": 265140 }, { "epoch": 4.655102793237241, "grad_norm": 0.049336936273594006, "learning_rate": 4.410827290019169e-06, "loss": 0.8359, "step": 265150 }, { "epoch": 4.6552783581172426, "grad_norm": 0.04871990914190845, "learning_rate": 4.408383643918646e-06, "loss": 0.8503, "step": 265160 }, { "epoch": 4.655453922997244, "grad_norm": 0.0632245658464954, "learning_rate": 4.405941221652396e-06, "loss": 0.8413, "step": 265170 }, { "epoch": 4.6556294878772455, "grad_norm": 0.05314838219795762, "learning_rate": 4.40350002325139e-06, "loss": 0.8464, "step": 265180 }, { "epoch": 4.6558050527572465, "grad_norm": 0.05022588351351373, "learning_rate": 4.401060048746549e-06, "loss": 0.84, "step": 265190 }, { "epoch": 4.655980617637248, "grad_norm": 0.1277967790041647, "learning_rate": 4.398621298168788e-06, "loss": 0.8326, "step": 265200 }, { "epoch": 4.6561561825172495, "grad_norm": 0.044322686984723435, "learning_rate": 4.396183771549016e-06, "loss": 0.8385, "step": 265210 }, { "epoch": 4.6563317473972505, "grad_norm": 0.055315082382266456, "learning_rate": 4.393747468918108e-06, "loss": 0.8433, "step": 265220 }, { "epoch": 4.6565073122772525, "grad_norm": 0.04749595310728598, "learning_rate": 4.3913123903069275e-06, "loss": 0.8407, "step": 265230 }, { "epoch": 4.6566828771572535, "grad_norm": 0.05168849083423946, "learning_rate": 4.388878535746346e-06, "loss": 0.8404, "step": 265240 }, { "epoch": 4.6568584420372545, "grad_norm": 0.04885998566893061, "learning_rate": 4.3864459052671745e-06, "loss": 0.8378, "step": 265250 }, { "epoch": 4.657034006917256, "grad_norm": 0.04426724812734429, "learning_rate": 4.384014498900254e-06, "loss": 0.8432, "step": 265260 }, { "epoch": 4.6572095717972575, "grad_norm": 0.057599586432929215, "learning_rate": 4.381584316676391e-06, "loss": 0.8326, "step": 265270 }, { "epoch": 4.657385136677259, "grad_norm": 0.056141594220397474, "learning_rate": 4.379155358626375e-06, "loss": 0.8386, "step": 265280 }, { "epoch": 4.65756070155726, "grad_norm": 0.06787767810702919, "learning_rate": 4.376727624780969e-06, "loss": 0.8393, "step": 265290 }, { "epoch": 4.6577362664372615, "grad_norm": 0.049002360483161624, "learning_rate": 4.374301115170935e-06, "loss": 0.8359, "step": 265300 }, { "epoch": 4.657911831317263, "grad_norm": 0.04480068238181881, "learning_rate": 4.37187582982704e-06, "loss": 0.8408, "step": 265310 }, { "epoch": 4.658087396197264, "grad_norm": 0.05206722726255003, "learning_rate": 4.369451768779983e-06, "loss": 0.8353, "step": 265320 }, { "epoch": 4.658262961077266, "grad_norm": 0.04854578803874495, "learning_rate": 4.3670289320605055e-06, "loss": 0.8418, "step": 265330 }, { "epoch": 4.658438525957267, "grad_norm": 0.052874573708117906, "learning_rate": 4.364607319699284e-06, "loss": 0.8383, "step": 265340 }, { "epoch": 4.658614090837269, "grad_norm": 0.05361078392526641, "learning_rate": 4.362186931727015e-06, "loss": 0.8427, "step": 265350 }, { "epoch": 4.65878965571727, "grad_norm": 0.052272235565682335, "learning_rate": 4.359767768174344e-06, "loss": 0.8403, "step": 265360 }, { "epoch": 4.658965220597271, "grad_norm": 0.05220970042361028, "learning_rate": 4.357349829071948e-06, "loss": 0.8422, "step": 265370 }, { "epoch": 4.659140785477273, "grad_norm": 0.05952320470460217, "learning_rate": 4.354933114450448e-06, "loss": 0.8378, "step": 265380 }, { "epoch": 4.659316350357274, "grad_norm": 0.06152824052229581, "learning_rate": 4.352517624340465e-06, "loss": 0.8322, "step": 265390 }, { "epoch": 4.659491915237276, "grad_norm": 0.053377477000211115, "learning_rate": 4.350103358772609e-06, "loss": 0.8386, "step": 265400 }, { "epoch": 4.659667480117277, "grad_norm": 0.04744644368136896, "learning_rate": 4.347690317777468e-06, "loss": 0.8412, "step": 265410 }, { "epoch": 4.659843044997279, "grad_norm": 0.06262292955444114, "learning_rate": 4.345278501385601e-06, "loss": 0.8366, "step": 265420 }, { "epoch": 4.66001860987728, "grad_norm": 0.06551791081072775, "learning_rate": 4.342867909627593e-06, "loss": 0.8377, "step": 265430 }, { "epoch": 4.660194174757281, "grad_norm": 0.0449105417253754, "learning_rate": 4.340458542533958e-06, "loss": 0.8395, "step": 265440 }, { "epoch": 4.660369739637283, "grad_norm": 0.05742282971527073, "learning_rate": 4.3380504001352615e-06, "loss": 0.8426, "step": 265450 }, { "epoch": 4.660545304517284, "grad_norm": 0.05553692246552626, "learning_rate": 4.33564348246197e-06, "loss": 0.8385, "step": 265460 }, { "epoch": 4.660720869397286, "grad_norm": 0.05604189290464433, "learning_rate": 4.3332377895446075e-06, "loss": 0.8378, "step": 265470 }, { "epoch": 4.660896434277287, "grad_norm": 0.07381937687519025, "learning_rate": 4.330833321413664e-06, "loss": 0.8374, "step": 265480 }, { "epoch": 4.661071999157288, "grad_norm": 0.05609552083028568, "learning_rate": 4.328430078099562e-06, "loss": 0.8417, "step": 265490 }, { "epoch": 4.66124756403729, "grad_norm": 0.04966165038782332, "learning_rate": 4.326028059632803e-06, "loss": 0.8403, "step": 265500 }, { "epoch": 4.661423128917291, "grad_norm": 0.05405034629802183, "learning_rate": 4.323627266043776e-06, "loss": 0.8396, "step": 265510 }, { "epoch": 4.661598693797293, "grad_norm": 0.06058425518726347, "learning_rate": 4.3212276973629394e-06, "loss": 0.8402, "step": 265520 }, { "epoch": 4.661774258677294, "grad_norm": 0.051116526998914696, "learning_rate": 4.318829353620663e-06, "loss": 0.836, "step": 265530 }, { "epoch": 4.661949823557295, "grad_norm": 0.054169478382998903, "learning_rate": 4.316432234847344e-06, "loss": 0.8337, "step": 265540 }, { "epoch": 4.662125388437297, "grad_norm": 0.04677932456739652, "learning_rate": 4.3140363410733676e-06, "loss": 0.8418, "step": 265550 }, { "epoch": 4.662300953317298, "grad_norm": 0.06596296038891632, "learning_rate": 4.3116416723290764e-06, "loss": 0.8398, "step": 265560 }, { "epoch": 4.6624765181973, "grad_norm": 0.04603541965423012, "learning_rate": 4.3092482286448074e-06, "loss": 0.8346, "step": 265570 }, { "epoch": 4.662652083077301, "grad_norm": 0.052126860655194555, "learning_rate": 4.306856010050908e-06, "loss": 0.8273, "step": 265580 }, { "epoch": 4.662827647957303, "grad_norm": 0.04510801114984144, "learning_rate": 4.304465016577659e-06, "loss": 0.8334, "step": 265590 }, { "epoch": 4.663003212837304, "grad_norm": 0.0444047452734566, "learning_rate": 4.3020752482553845e-06, "loss": 0.8294, "step": 265600 }, { "epoch": 4.663178777717305, "grad_norm": 0.050222164786892474, "learning_rate": 4.2996867051143436e-06, "loss": 0.8336, "step": 265610 }, { "epoch": 4.663354342597307, "grad_norm": 0.044587034969284636, "learning_rate": 4.2972993871848056e-06, "loss": 0.846, "step": 265620 }, { "epoch": 4.663529907477308, "grad_norm": 0.05160506683138448, "learning_rate": 4.294913294497019e-06, "loss": 0.8526, "step": 265630 }, { "epoch": 4.66370547235731, "grad_norm": 0.06845267640501167, "learning_rate": 4.292528427081208e-06, "loss": 0.8398, "step": 265640 }, { "epoch": 4.663881037237311, "grad_norm": 0.047741714995929586, "learning_rate": 4.290144784967601e-06, "loss": 0.842, "step": 265650 }, { "epoch": 4.664056602117313, "grad_norm": 0.04899951087333491, "learning_rate": 4.287762368186378e-06, "loss": 0.8339, "step": 265660 }, { "epoch": 4.664232166997314, "grad_norm": 0.041889489722192905, "learning_rate": 4.285381176767767e-06, "loss": 0.8425, "step": 265670 }, { "epoch": 4.664407731877315, "grad_norm": 0.04432999181290407, "learning_rate": 4.283001210741891e-06, "loss": 0.8446, "step": 265680 }, { "epoch": 4.664583296757317, "grad_norm": 0.05317577189194004, "learning_rate": 4.280622470138946e-06, "loss": 0.8383, "step": 265690 }, { "epoch": 4.664758861637318, "grad_norm": 0.053850042860729315, "learning_rate": 4.278244954989036e-06, "loss": 0.841, "step": 265700 }, { "epoch": 4.66493442651732, "grad_norm": 0.06305143996621833, "learning_rate": 4.2758686653223e-06, "loss": 0.8447, "step": 265710 }, { "epoch": 4.665109991397321, "grad_norm": 0.048097893412389355, "learning_rate": 4.273493601168852e-06, "loss": 0.8435, "step": 265720 }, { "epoch": 4.665285556277322, "grad_norm": 0.05893582780113315, "learning_rate": 4.271119762558763e-06, "loss": 0.8409, "step": 265730 }, { "epoch": 4.665461121157324, "grad_norm": 0.04894072827444121, "learning_rate": 4.2687471495221316e-06, "loss": 0.8393, "step": 265740 }, { "epoch": 4.665636686037325, "grad_norm": 0.06146187261879387, "learning_rate": 4.266375762089025e-06, "loss": 0.8372, "step": 265750 }, { "epoch": 4.665812250917327, "grad_norm": 0.04980956587009297, "learning_rate": 4.264005600289452e-06, "loss": 0.8363, "step": 265760 }, { "epoch": 4.665987815797328, "grad_norm": 0.053836010697510775, "learning_rate": 4.261636664153483e-06, "loss": 0.8392, "step": 265770 }, { "epoch": 4.666163380677329, "grad_norm": 0.08011089157167062, "learning_rate": 4.259268953711114e-06, "loss": 0.8284, "step": 265780 }, { "epoch": 4.666338945557331, "grad_norm": 0.04697104487817675, "learning_rate": 4.256902468992362e-06, "loss": 0.8375, "step": 265790 }, { "epoch": 4.666514510437332, "grad_norm": 0.044195051891115056, "learning_rate": 4.254537210027177e-06, "loss": 0.8336, "step": 265800 }, { "epoch": 4.666690075317334, "grad_norm": 0.03950652300914237, "learning_rate": 4.252173176845555e-06, "loss": 0.8451, "step": 265810 }, { "epoch": 4.666865640197335, "grad_norm": 0.045853023133502695, "learning_rate": 4.249810369477435e-06, "loss": 0.8433, "step": 265820 }, { "epoch": 4.667041205077337, "grad_norm": 0.051341046144287944, "learning_rate": 4.247448787952758e-06, "loss": 0.8372, "step": 265830 }, { "epoch": 4.667216769957338, "grad_norm": 0.0522277858571077, "learning_rate": 4.2450884323014546e-06, "loss": 0.8321, "step": 265840 }, { "epoch": 4.66739233483734, "grad_norm": 0.04512023593226506, "learning_rate": 4.2427293025534274e-06, "loss": 0.8375, "step": 265850 }, { "epoch": 4.667567899717341, "grad_norm": 0.0487335105420395, "learning_rate": 4.240371398738555e-06, "loss": 0.8332, "step": 265860 }, { "epoch": 4.667743464597342, "grad_norm": 0.059233626308728075, "learning_rate": 4.238014720886731e-06, "loss": 0.838, "step": 265870 }, { "epoch": 4.667919029477344, "grad_norm": 0.056216130355998796, "learning_rate": 4.235659269027786e-06, "loss": 0.8393, "step": 265880 }, { "epoch": 4.668094594357345, "grad_norm": 0.04915215729364625, "learning_rate": 4.233305043191606e-06, "loss": 0.8371, "step": 265890 }, { "epoch": 4.668270159237347, "grad_norm": 0.06936758371237513, "learning_rate": 4.2309520434079756e-06, "loss": 0.8353, "step": 265900 }, { "epoch": 4.668445724117348, "grad_norm": 0.06407505506255586, "learning_rate": 4.228600269706738e-06, "loss": 0.8408, "step": 265910 }, { "epoch": 4.668621288997349, "grad_norm": 0.05346090063868166, "learning_rate": 4.226249722117688e-06, "loss": 0.8373, "step": 265920 }, { "epoch": 4.6687968538773506, "grad_norm": 0.046841462324272644, "learning_rate": 4.22390040067058e-06, "loss": 0.8334, "step": 265930 }, { "epoch": 4.668972418757352, "grad_norm": 0.05673824642806295, "learning_rate": 4.2215523053952235e-06, "loss": 0.8455, "step": 265940 }, { "epoch": 4.6691479836373535, "grad_norm": 0.04967566094309692, "learning_rate": 4.2192054363213355e-06, "loss": 0.841, "step": 265950 }, { "epoch": 4.6693235485173545, "grad_norm": 0.05466150611273102, "learning_rate": 4.216859793478672e-06, "loss": 0.8433, "step": 265960 }, { "epoch": 4.669499113397356, "grad_norm": 0.05272064215812018, "learning_rate": 4.214515376896931e-06, "loss": 0.838, "step": 265970 }, { "epoch": 4.6696746782773575, "grad_norm": 0.05598441520952931, "learning_rate": 4.212172186605831e-06, "loss": 0.8317, "step": 265980 }, { "epoch": 4.6698502431573585, "grad_norm": 0.048135629300452856, "learning_rate": 4.209830222635072e-06, "loss": 0.8331, "step": 265990 }, { "epoch": 4.6700258080373604, "grad_norm": 0.05140265360119182, "learning_rate": 4.207489485014285e-06, "loss": 0.8413, "step": 266000 }, { "epoch": 4.6702013729173615, "grad_norm": 0.054729050623292474, "learning_rate": 4.205149973773191e-06, "loss": 0.8342, "step": 266010 }, { "epoch": 4.670376937797363, "grad_norm": 0.07540270959167492, "learning_rate": 4.202811688941379e-06, "loss": 0.8349, "step": 266020 }, { "epoch": 4.670552502677364, "grad_norm": 0.05988521721382386, "learning_rate": 4.200474630548501e-06, "loss": 0.8372, "step": 266030 }, { "epoch": 4.6707280675573655, "grad_norm": 0.05155999015250164, "learning_rate": 4.198138798624147e-06, "loss": 0.833, "step": 266040 }, { "epoch": 4.670903632437367, "grad_norm": 0.0647357911864209, "learning_rate": 4.195804193197939e-06, "loss": 0.841, "step": 266050 }, { "epoch": 4.671079197317368, "grad_norm": 0.04900386200470412, "learning_rate": 4.193470814299452e-06, "loss": 0.8365, "step": 266060 }, { "epoch": 4.67125476219737, "grad_norm": 0.048800972141669126, "learning_rate": 4.191138661958221e-06, "loss": 0.8424, "step": 266070 }, { "epoch": 4.671430327077371, "grad_norm": 0.047505544580597386, "learning_rate": 4.188807736203825e-06, "loss": 0.8469, "step": 266080 }, { "epoch": 4.671605891957373, "grad_norm": 0.05379836658351921, "learning_rate": 4.186478037065804e-06, "loss": 0.8281, "step": 266090 }, { "epoch": 4.671781456837374, "grad_norm": 0.055934955762778085, "learning_rate": 4.1841495645736295e-06, "loss": 0.844, "step": 266100 }, { "epoch": 4.671957021717375, "grad_norm": 0.04861937661394627, "learning_rate": 4.181822318756866e-06, "loss": 0.8365, "step": 266110 }, { "epoch": 4.672132586597377, "grad_norm": 0.046090690684590624, "learning_rate": 4.179496299644947e-06, "loss": 0.8352, "step": 266120 }, { "epoch": 4.672308151477378, "grad_norm": 0.07460998179795268, "learning_rate": 4.177171507267386e-06, "loss": 0.8446, "step": 266130 }, { "epoch": 4.67248371635738, "grad_norm": 0.046808622748815074, "learning_rate": 4.174847941653607e-06, "loss": 0.8368, "step": 266140 }, { "epoch": 4.672659281237381, "grad_norm": 0.0472288063617694, "learning_rate": 4.172525602833051e-06, "loss": 0.8444, "step": 266150 }, { "epoch": 4.672834846117382, "grad_norm": 0.05839807196018264, "learning_rate": 4.170204490835168e-06, "loss": 0.8316, "step": 266160 }, { "epoch": 4.673010410997384, "grad_norm": 0.04926396714143295, "learning_rate": 4.167884605689336e-06, "loss": 0.8354, "step": 266170 }, { "epoch": 4.673185975877385, "grad_norm": 0.05727439023972962, "learning_rate": 4.165565947424979e-06, "loss": 0.8383, "step": 266180 }, { "epoch": 4.673361540757387, "grad_norm": 0.07023896886856934, "learning_rate": 4.163248516071464e-06, "loss": 0.8435, "step": 266190 }, { "epoch": 4.673537105637388, "grad_norm": 0.05239959339981659, "learning_rate": 4.160932311658128e-06, "loss": 0.8341, "step": 266200 }, { "epoch": 4.673712670517389, "grad_norm": 0.04574998346044508, "learning_rate": 4.15861733421435e-06, "loss": 0.8369, "step": 266210 }, { "epoch": 4.673888235397391, "grad_norm": 0.04870733492327791, "learning_rate": 4.1563035837694425e-06, "loss": 0.8438, "step": 266220 }, { "epoch": 4.674063800277392, "grad_norm": 0.04938349264048995, "learning_rate": 4.153991060352743e-06, "loss": 0.8342, "step": 266230 }, { "epoch": 4.674239365157394, "grad_norm": 0.060625654788288665, "learning_rate": 4.151679763993531e-06, "loss": 0.8374, "step": 266240 }, { "epoch": 4.674414930037395, "grad_norm": 0.04282955694759285, "learning_rate": 4.149369694721098e-06, "loss": 0.836, "step": 266250 }, { "epoch": 4.674590494917397, "grad_norm": 0.051519627990433435, "learning_rate": 4.147060852564714e-06, "loss": 0.8405, "step": 266260 }, { "epoch": 4.674766059797398, "grad_norm": 0.04838209057172984, "learning_rate": 4.144753237553616e-06, "loss": 0.8422, "step": 266270 }, { "epoch": 4.674941624677399, "grad_norm": 0.05229888144152544, "learning_rate": 4.142446849717071e-06, "loss": 0.8331, "step": 266280 }, { "epoch": 4.675117189557401, "grad_norm": 0.05653964265342499, "learning_rate": 4.140141689084286e-06, "loss": 0.8349, "step": 266290 }, { "epoch": 4.675292754437402, "grad_norm": 0.06113543505129948, "learning_rate": 4.137837755684474e-06, "loss": 0.8376, "step": 266300 }, { "epoch": 4.675468319317404, "grad_norm": 0.061519716239832746, "learning_rate": 4.1355350495468055e-06, "loss": 0.8399, "step": 266310 }, { "epoch": 4.675643884197405, "grad_norm": 0.047933442384507065, "learning_rate": 4.133233570700474e-06, "loss": 0.8345, "step": 266320 }, { "epoch": 4.675819449077407, "grad_norm": 0.06017502468377595, "learning_rate": 4.13093331917465e-06, "loss": 0.8438, "step": 266330 }, { "epoch": 4.675995013957408, "grad_norm": 0.043836420650104554, "learning_rate": 4.128634294998448e-06, "loss": 0.8337, "step": 266340 }, { "epoch": 4.676170578837409, "grad_norm": 0.0657668555058995, "learning_rate": 4.126336498201041e-06, "loss": 0.834, "step": 266350 }, { "epoch": 4.676346143717411, "grad_norm": 0.04775484234332777, "learning_rate": 4.12403992881151e-06, "loss": 0.8384, "step": 266360 }, { "epoch": 4.676521708597412, "grad_norm": 0.06941045791473106, "learning_rate": 4.121744586858938e-06, "loss": 0.8343, "step": 266370 }, { "epoch": 4.676697273477414, "grad_norm": 0.04926311554912067, "learning_rate": 4.11945047237244e-06, "loss": 0.8321, "step": 266380 }, { "epoch": 4.676872838357415, "grad_norm": 0.04557572705129414, "learning_rate": 4.1171575853810775e-06, "loss": 0.8348, "step": 266390 }, { "epoch": 4.677048403237416, "grad_norm": 0.055111793168453466, "learning_rate": 4.11486592591389e-06, "loss": 0.8413, "step": 266400 }, { "epoch": 4.677223968117418, "grad_norm": 0.04480991477702378, "learning_rate": 4.112575493999927e-06, "loss": 0.8374, "step": 266410 }, { "epoch": 4.677399532997419, "grad_norm": 0.046752001547907505, "learning_rate": 4.110286289668193e-06, "loss": 0.8324, "step": 266420 }, { "epoch": 4.677575097877421, "grad_norm": 0.0553335350173884, "learning_rate": 4.1079983129477055e-06, "loss": 0.8435, "step": 266430 }, { "epoch": 4.677750662757422, "grad_norm": 0.045082253124120106, "learning_rate": 4.105711563867438e-06, "loss": 0.8415, "step": 266440 }, { "epoch": 4.677926227637423, "grad_norm": 0.04584229687853325, "learning_rate": 4.1034260424563845e-06, "loss": 0.838, "step": 266450 }, { "epoch": 4.678101792517425, "grad_norm": 0.051821742830595915, "learning_rate": 4.101141748743486e-06, "loss": 0.8377, "step": 266460 }, { "epoch": 4.678277357397426, "grad_norm": 0.061684019186398376, "learning_rate": 4.098858682757703e-06, "loss": 0.8374, "step": 266470 }, { "epoch": 4.678452922277428, "grad_norm": 0.04881409866333755, "learning_rate": 4.096576844527942e-06, "loss": 0.8355, "step": 266480 }, { "epoch": 4.678628487157429, "grad_norm": 0.048732622439878255, "learning_rate": 4.094296234083122e-06, "loss": 0.8367, "step": 266490 }, { "epoch": 4.678804052037431, "grad_norm": 0.08698005184767361, "learning_rate": 4.092016851452162e-06, "loss": 0.8399, "step": 266500 }, { "epoch": 4.678979616917432, "grad_norm": 0.05843450876619993, "learning_rate": 4.089738696663889e-06, "loss": 0.837, "step": 266510 }, { "epoch": 4.679155181797434, "grad_norm": 0.05048610495455827, "learning_rate": 4.087461769747223e-06, "loss": 0.8336, "step": 266520 }, { "epoch": 4.679330746677435, "grad_norm": 0.049907547188900094, "learning_rate": 4.085186070730982e-06, "loss": 0.842, "step": 266530 }, { "epoch": 4.679506311557436, "grad_norm": 0.05440961668103802, "learning_rate": 4.082911599644007e-06, "loss": 0.8395, "step": 266540 }, { "epoch": 4.679681876437438, "grad_norm": 0.044076052624389205, "learning_rate": 4.080638356515118e-06, "loss": 0.8361, "step": 266550 }, { "epoch": 4.679857441317439, "grad_norm": 0.05704032950178155, "learning_rate": 4.078366341373111e-06, "loss": 0.833, "step": 266560 }, { "epoch": 4.680033006197441, "grad_norm": 0.04470858184419258, "learning_rate": 4.0760955542467835e-06, "loss": 0.8354, "step": 266570 }, { "epoch": 4.680208571077442, "grad_norm": 0.0496052959728219, "learning_rate": 4.073825995164898e-06, "loss": 0.8379, "step": 266580 }, { "epoch": 4.680384135957443, "grad_norm": 0.04528067536477982, "learning_rate": 4.07155766415621e-06, "loss": 0.8379, "step": 266590 }, { "epoch": 4.680559700837445, "grad_norm": 0.05805193151413004, "learning_rate": 4.06929056124947e-06, "loss": 0.8366, "step": 266600 }, { "epoch": 4.680735265717446, "grad_norm": 0.055131614241161725, "learning_rate": 4.067024686473378e-06, "loss": 0.833, "step": 266610 }, { "epoch": 4.680910830597448, "grad_norm": 0.0450563724495724, "learning_rate": 4.064760039856674e-06, "loss": 0.843, "step": 266620 }, { "epoch": 4.681086395477449, "grad_norm": 0.05135533776211319, "learning_rate": 4.062496621428036e-06, "loss": 0.8362, "step": 266630 }, { "epoch": 4.68126196035745, "grad_norm": 0.053458573842434645, "learning_rate": 4.060234431216151e-06, "loss": 0.8445, "step": 266640 }, { "epoch": 4.681437525237452, "grad_norm": 0.04683713077323124, "learning_rate": 4.0579734692496606e-06, "loss": 0.8366, "step": 266650 }, { "epoch": 4.681613090117453, "grad_norm": 0.05561733556361488, "learning_rate": 4.05571373555723e-06, "loss": 0.8422, "step": 266660 }, { "epoch": 4.681788654997455, "grad_norm": 0.04857280803490181, "learning_rate": 4.053455230167493e-06, "loss": 0.8387, "step": 266670 }, { "epoch": 4.681964219877456, "grad_norm": 0.05506541154648383, "learning_rate": 4.051197953109048e-06, "loss": 0.8362, "step": 266680 }, { "epoch": 4.682139784757457, "grad_norm": 0.04183681532906464, "learning_rate": 4.048941904410515e-06, "loss": 0.8315, "step": 266690 }, { "epoch": 4.6823153496374585, "grad_norm": 0.06236881496468625, "learning_rate": 4.046687084100472e-06, "loss": 0.8363, "step": 266700 }, { "epoch": 4.68249091451746, "grad_norm": 0.042018771729010786, "learning_rate": 4.0444334922074745e-06, "loss": 0.8388, "step": 266710 }, { "epoch": 4.6826664793974615, "grad_norm": 0.0543086992509147, "learning_rate": 4.042181128760077e-06, "loss": 0.8324, "step": 266720 }, { "epoch": 4.6828420442774625, "grad_norm": 0.05099226674041593, "learning_rate": 4.039929993786835e-06, "loss": 0.835, "step": 266730 }, { "epoch": 4.6830176091574645, "grad_norm": 0.05349323900765579, "learning_rate": 4.03768008731627e-06, "loss": 0.8298, "step": 266740 }, { "epoch": 4.6831931740374655, "grad_norm": 0.051679155860820135, "learning_rate": 4.035431409376872e-06, "loss": 0.8378, "step": 266750 }, { "epoch": 4.683368738917467, "grad_norm": 0.0626765744693269, "learning_rate": 4.033183959997141e-06, "loss": 0.8406, "step": 266760 }, { "epoch": 4.6835443037974684, "grad_norm": 0.056205238620012074, "learning_rate": 4.030937739205567e-06, "loss": 0.8455, "step": 266770 }, { "epoch": 4.6837198686774695, "grad_norm": 0.058809764338237244, "learning_rate": 4.028692747030572e-06, "loss": 0.8367, "step": 266780 }, { "epoch": 4.683895433557471, "grad_norm": 0.04762997079605067, "learning_rate": 4.026448983500645e-06, "loss": 0.8394, "step": 266790 }, { "epoch": 4.684070998437472, "grad_norm": 0.04856248317318233, "learning_rate": 4.024206448644189e-06, "loss": 0.8384, "step": 266800 }, { "epoch": 4.684246563317474, "grad_norm": 0.054537555573307694, "learning_rate": 4.0219651424896255e-06, "loss": 0.8291, "step": 266810 }, { "epoch": 4.684422128197475, "grad_norm": 0.04937819952416661, "learning_rate": 4.0197250650653355e-06, "loss": 0.8306, "step": 266820 }, { "epoch": 4.684597693077476, "grad_norm": 0.04638174291213993, "learning_rate": 4.017486216399729e-06, "loss": 0.8339, "step": 266830 }, { "epoch": 4.684773257957478, "grad_norm": 0.05248566699816836, "learning_rate": 4.015248596521155e-06, "loss": 0.8415, "step": 266840 }, { "epoch": 4.684948822837479, "grad_norm": 0.05535330679461186, "learning_rate": 4.013012205457957e-06, "loss": 0.8436, "step": 266850 }, { "epoch": 4.685124387717481, "grad_norm": 0.06396312846552213, "learning_rate": 4.010777043238495e-06, "loss": 0.8418, "step": 266860 }, { "epoch": 4.685299952597482, "grad_norm": 0.04511677646578989, "learning_rate": 4.008543109891081e-06, "loss": 0.8323, "step": 266870 }, { "epoch": 4.685475517477483, "grad_norm": 0.0530175021914293, "learning_rate": 4.006310405444005e-06, "loss": 0.8496, "step": 266880 }, { "epoch": 4.685651082357485, "grad_norm": 0.05125673461785178, "learning_rate": 4.004078929925562e-06, "loss": 0.8353, "step": 266890 }, { "epoch": 4.685826647237486, "grad_norm": 0.04794601697423195, "learning_rate": 4.0018486833640286e-06, "loss": 0.8414, "step": 266900 }, { "epoch": 4.686002212117488, "grad_norm": 0.047257772389121974, "learning_rate": 3.9996196657876766e-06, "loss": 0.8362, "step": 266910 }, { "epoch": 4.686177776997489, "grad_norm": 0.04236968302833078, "learning_rate": 3.99739187722472e-06, "loss": 0.8406, "step": 266920 }, { "epoch": 4.686353341877491, "grad_norm": 0.05771800886024948, "learning_rate": 3.995165317703394e-06, "loss": 0.8421, "step": 266930 }, { "epoch": 4.686528906757492, "grad_norm": 0.05368147210072871, "learning_rate": 3.992939987251924e-06, "loss": 0.8322, "step": 266940 }, { "epoch": 4.686704471637493, "grad_norm": 0.050833270593746, "learning_rate": 3.990715885898483e-06, "loss": 0.8343, "step": 266950 }, { "epoch": 4.686880036517495, "grad_norm": 0.057729895305004456, "learning_rate": 3.988493013671284e-06, "loss": 0.8457, "step": 266960 }, { "epoch": 4.687055601397496, "grad_norm": 0.04847196499325078, "learning_rate": 3.986271370598452e-06, "loss": 0.8326, "step": 266970 }, { "epoch": 4.687231166277498, "grad_norm": 0.06949748785995478, "learning_rate": 3.984050956708171e-06, "loss": 0.8437, "step": 266980 }, { "epoch": 4.687406731157499, "grad_norm": 0.04920759622331658, "learning_rate": 3.9818317720285435e-06, "loss": 0.8357, "step": 266990 }, { "epoch": 4.687582296037501, "grad_norm": 0.06976097525782018, "learning_rate": 3.979613816587708e-06, "loss": 0.8395, "step": 267000 }, { "epoch": 4.687757860917502, "grad_norm": 0.06775888860421951, "learning_rate": 3.9773970904137574e-06, "loss": 0.8378, "step": 267010 }, { "epoch": 4.687933425797503, "grad_norm": 0.04325731051919898, "learning_rate": 3.975181593534775e-06, "loss": 0.8416, "step": 267020 }, { "epoch": 4.688108990677505, "grad_norm": 0.05319126073454429, "learning_rate": 3.972967325978843e-06, "loss": 0.845, "step": 267030 }, { "epoch": 4.688284555557506, "grad_norm": 0.05127940772790607, "learning_rate": 3.970754287774021e-06, "loss": 0.8355, "step": 267040 }, { "epoch": 4.688460120437508, "grad_norm": 0.05352201072357986, "learning_rate": 3.968542478948317e-06, "loss": 0.8361, "step": 267050 }, { "epoch": 4.688635685317509, "grad_norm": 0.052326633673514304, "learning_rate": 3.966331899529789e-06, "loss": 0.8396, "step": 267060 }, { "epoch": 4.68881125019751, "grad_norm": 0.04742240410797343, "learning_rate": 3.964122549546423e-06, "loss": 0.8374, "step": 267070 }, { "epoch": 4.688986815077512, "grad_norm": 0.04721589555397845, "learning_rate": 3.9619144290262325e-06, "loss": 0.8357, "step": 267080 }, { "epoch": 4.689162379957513, "grad_norm": 0.05892084909712918, "learning_rate": 3.959707537997171e-06, "loss": 0.8334, "step": 267090 }, { "epoch": 4.689337944837515, "grad_norm": 0.057966807692451036, "learning_rate": 3.95750187648722e-06, "loss": 0.8348, "step": 267100 }, { "epoch": 4.689513509717516, "grad_norm": 0.05946028953595775, "learning_rate": 3.9552974445243215e-06, "loss": 0.8399, "step": 267110 }, { "epoch": 4.689689074597517, "grad_norm": 0.057031723448869316, "learning_rate": 3.95309424213638e-06, "loss": 0.8364, "step": 267120 }, { "epoch": 4.689864639477519, "grad_norm": 0.041991433074674234, "learning_rate": 3.950892269351358e-06, "loss": 0.8316, "step": 267130 }, { "epoch": 4.69004020435752, "grad_norm": 0.05276576293307931, "learning_rate": 3.94869152619712e-06, "loss": 0.8408, "step": 267140 }, { "epoch": 4.690215769237522, "grad_norm": 0.04741873694795131, "learning_rate": 3.946492012701579e-06, "loss": 0.8366, "step": 267150 }, { "epoch": 4.690391334117523, "grad_norm": 0.07640464230386283, "learning_rate": 3.944293728892557e-06, "loss": 0.8377, "step": 267160 }, { "epoch": 4.690566898997525, "grad_norm": 0.05308150167577109, "learning_rate": 3.942096674797951e-06, "loss": 0.8347, "step": 267170 }, { "epoch": 4.690742463877526, "grad_norm": 0.05741710016525087, "learning_rate": 3.939900850445578e-06, "loss": 0.8366, "step": 267180 }, { "epoch": 4.690918028757528, "grad_norm": 0.055490616802752656, "learning_rate": 3.937706255863255e-06, "loss": 0.8394, "step": 267190 }, { "epoch": 4.691093593637529, "grad_norm": 0.04912146513138575, "learning_rate": 3.935512891078805e-06, "loss": 0.8388, "step": 267200 }, { "epoch": 4.69126915851753, "grad_norm": 0.047236394393004484, "learning_rate": 3.933320756120009e-06, "loss": 0.8366, "step": 267210 }, { "epoch": 4.691444723397532, "grad_norm": 0.04895519765991052, "learning_rate": 3.931129851014645e-06, "loss": 0.8321, "step": 267220 }, { "epoch": 4.691620288277533, "grad_norm": 0.04788850832137085, "learning_rate": 3.928940175790454e-06, "loss": 0.8371, "step": 267230 }, { "epoch": 4.691795853157535, "grad_norm": 0.05651568763324595, "learning_rate": 3.926751730475199e-06, "loss": 0.8406, "step": 267240 }, { "epoch": 4.691971418037536, "grad_norm": 0.046580418260578474, "learning_rate": 3.924564515096612e-06, "loss": 0.8344, "step": 267250 }, { "epoch": 4.692146982917537, "grad_norm": 0.04745823837022958, "learning_rate": 3.922378529682391e-06, "loss": 0.8436, "step": 267260 }, { "epoch": 4.692322547797539, "grad_norm": 0.06464798656542116, "learning_rate": 3.920193774260244e-06, "loss": 0.8392, "step": 267270 }, { "epoch": 4.69249811267754, "grad_norm": 0.06433269654296371, "learning_rate": 3.9180102488578465e-06, "loss": 0.8384, "step": 267280 }, { "epoch": 4.692673677557542, "grad_norm": 0.056798808733829184, "learning_rate": 3.915827953502853e-06, "loss": 0.8389, "step": 267290 }, { "epoch": 4.692849242437543, "grad_norm": 0.05051202524819432, "learning_rate": 3.91364688822294e-06, "loss": 0.8348, "step": 267300 }, { "epoch": 4.693024807317544, "grad_norm": 0.04316900497294111, "learning_rate": 3.911467053045716e-06, "loss": 0.8327, "step": 267310 }, { "epoch": 4.693200372197546, "grad_norm": 0.061969204649330305, "learning_rate": 3.909288447998824e-06, "loss": 0.8289, "step": 267320 }, { "epoch": 4.693375937077547, "grad_norm": 0.04632290525239616, "learning_rate": 3.9071110731098535e-06, "loss": 0.8411, "step": 267330 }, { "epoch": 4.693551501957549, "grad_norm": 0.04789655239751201, "learning_rate": 3.90493492840638e-06, "loss": 0.8442, "step": 267340 }, { "epoch": 4.69372706683755, "grad_norm": 0.048759035357047516, "learning_rate": 3.902760013916003e-06, "loss": 0.8399, "step": 267350 }, { "epoch": 4.693902631717551, "grad_norm": 0.04267342851101631, "learning_rate": 3.900586329666245e-06, "loss": 0.8444, "step": 267360 }, { "epoch": 4.694078196597553, "grad_norm": 0.051475865038630456, "learning_rate": 3.898413875684693e-06, "loss": 0.8378, "step": 267370 }, { "epoch": 4.694253761477554, "grad_norm": 0.05013455026967032, "learning_rate": 3.896242651998836e-06, "loss": 0.8346, "step": 267380 }, { "epoch": 4.694429326357556, "grad_norm": 0.048565435057501834, "learning_rate": 3.8940726586361965e-06, "loss": 0.8383, "step": 267390 }, { "epoch": 4.694604891237557, "grad_norm": 0.05542765872236257, "learning_rate": 3.8919038956242635e-06, "loss": 0.8403, "step": 267400 }, { "epoch": 4.694780456117559, "grad_norm": 0.054865968758593245, "learning_rate": 3.889736362990514e-06, "loss": 0.8385, "step": 267410 }, { "epoch": 4.69495602099756, "grad_norm": 0.07115759516580686, "learning_rate": 3.887570060762428e-06, "loss": 0.8421, "step": 267420 }, { "epoch": 4.6951315858775615, "grad_norm": 0.052713372832493506, "learning_rate": 3.885404988967427e-06, "loss": 0.8339, "step": 267430 }, { "epoch": 4.6953071507575626, "grad_norm": 0.046071225661225304, "learning_rate": 3.883241147632967e-06, "loss": 0.8353, "step": 267440 }, { "epoch": 4.695482715637564, "grad_norm": 0.05047572564467077, "learning_rate": 3.88107853678645e-06, "loss": 0.8397, "step": 267450 }, { "epoch": 4.6956582805175655, "grad_norm": 0.04080515202503704, "learning_rate": 3.878917156455276e-06, "loss": 0.8353, "step": 267460 }, { "epoch": 4.6958338453975665, "grad_norm": 0.06101269994428801, "learning_rate": 3.876757006666847e-06, "loss": 0.8363, "step": 267470 }, { "epoch": 4.6960094102775685, "grad_norm": 0.05705948526749106, "learning_rate": 3.8745980874485095e-06, "loss": 0.8349, "step": 267480 }, { "epoch": 4.6961849751575695, "grad_norm": 0.06370512782669564, "learning_rate": 3.872440398827641e-06, "loss": 0.8265, "step": 267490 }, { "epoch": 4.6963605400375705, "grad_norm": 0.052396943057172976, "learning_rate": 3.870283940831557e-06, "loss": 0.8304, "step": 267500 }, { "epoch": 4.6965361049175725, "grad_norm": 0.053428428692608367, "learning_rate": 3.868128713487579e-06, "loss": 0.8399, "step": 267510 }, { "epoch": 4.6967116697975735, "grad_norm": 0.05189453285462499, "learning_rate": 3.8659747168230535e-06, "loss": 0.8426, "step": 267520 }, { "epoch": 4.696887234677575, "grad_norm": 0.05309220565313678, "learning_rate": 3.863821950865208e-06, "loss": 0.8394, "step": 267530 }, { "epoch": 4.697062799557576, "grad_norm": 0.05722076682317009, "learning_rate": 3.8616704156413875e-06, "loss": 0.832, "step": 267540 }, { "epoch": 4.6972383644375775, "grad_norm": 0.04729842804315651, "learning_rate": 3.859520111178816e-06, "loss": 0.8409, "step": 267550 }, { "epoch": 4.697413929317579, "grad_norm": 0.0473312583662443, "learning_rate": 3.85737103750472e-06, "loss": 0.8443, "step": 267560 }, { "epoch": 4.69758949419758, "grad_norm": 0.06483039769884656, "learning_rate": 3.855223194646368e-06, "loss": 0.8291, "step": 267570 }, { "epoch": 4.697765059077582, "grad_norm": 0.052502909753894905, "learning_rate": 3.853076582630942e-06, "loss": 0.8408, "step": 267580 }, { "epoch": 4.697940623957583, "grad_norm": 0.04368234106113046, "learning_rate": 3.8509312014856665e-06, "loss": 0.8475, "step": 267590 }, { "epoch": 4.698116188837585, "grad_norm": 0.05335674473652865, "learning_rate": 3.848787051237712e-06, "loss": 0.836, "step": 267600 }, { "epoch": 4.698291753717586, "grad_norm": 0.06385919307131646, "learning_rate": 3.846644131914239e-06, "loss": 0.8321, "step": 267610 }, { "epoch": 4.698467318597587, "grad_norm": 0.0583129328229909, "learning_rate": 3.844502443542404e-06, "loss": 0.8315, "step": 267620 }, { "epoch": 4.698642883477589, "grad_norm": 0.06440548850230808, "learning_rate": 3.842361986149336e-06, "loss": 0.8414, "step": 267630 }, { "epoch": 4.69881844835759, "grad_norm": 0.07329617597450065, "learning_rate": 3.84022275976217e-06, "loss": 0.8403, "step": 267640 }, { "epoch": 4.698994013237592, "grad_norm": 0.08767050665903607, "learning_rate": 3.838084764408001e-06, "loss": 0.8319, "step": 267650 }, { "epoch": 4.699169578117593, "grad_norm": 0.04160079141717355, "learning_rate": 3.835948000113922e-06, "loss": 0.8357, "step": 267660 }, { "epoch": 4.699345142997595, "grad_norm": 0.047167075279582686, "learning_rate": 3.833812466907005e-06, "loss": 0.8387, "step": 267670 }, { "epoch": 4.699520707877596, "grad_norm": 0.04555878634379115, "learning_rate": 3.831678164814298e-06, "loss": 0.837, "step": 267680 }, { "epoch": 4.699696272757597, "grad_norm": 0.04545577425876813, "learning_rate": 3.829545093862862e-06, "loss": 0.8448, "step": 267690 }, { "epoch": 4.699871837637599, "grad_norm": 0.052463981854190156, "learning_rate": 3.827413254079691e-06, "loss": 0.8374, "step": 267700 }, { "epoch": 4.7000474025176, "grad_norm": 0.055277261384303604, "learning_rate": 3.825282645491835e-06, "loss": 0.8368, "step": 267710 }, { "epoch": 4.700222967397602, "grad_norm": 0.06115689324969741, "learning_rate": 3.823153268126277e-06, "loss": 0.8439, "step": 267720 }, { "epoch": 4.700398532277603, "grad_norm": 0.07261499974606021, "learning_rate": 3.821025122009979e-06, "loss": 0.8315, "step": 267730 }, { "epoch": 4.700574097157604, "grad_norm": 0.05153263786849403, "learning_rate": 3.818898207169911e-06, "loss": 0.8346, "step": 267740 }, { "epoch": 4.700749662037606, "grad_norm": 0.058820816778932764, "learning_rate": 3.8167725236330385e-06, "loss": 0.8443, "step": 267750 }, { "epoch": 4.700925226917607, "grad_norm": 0.058356241007583104, "learning_rate": 3.814648071426288e-06, "loss": 0.8304, "step": 267760 }, { "epoch": 4.701100791797609, "grad_norm": 0.0522849698428773, "learning_rate": 3.8125248505765542e-06, "loss": 0.8451, "step": 267770 }, { "epoch": 4.70127635667761, "grad_norm": 0.04415246472113507, "learning_rate": 3.8104028611107666e-06, "loss": 0.8363, "step": 267780 }, { "epoch": 4.701451921557611, "grad_norm": 0.046035285705998984, "learning_rate": 3.808282103055808e-06, "loss": 0.8374, "step": 267790 }, { "epoch": 4.701627486437613, "grad_norm": 0.050791778836912556, "learning_rate": 3.806162576438532e-06, "loss": 0.842, "step": 267800 }, { "epoch": 4.701803051317614, "grad_norm": 0.04498143166447269, "learning_rate": 3.804044281285801e-06, "loss": 0.8353, "step": 267810 }, { "epoch": 4.701978616197616, "grad_norm": 0.041056842886299634, "learning_rate": 3.8019272176244652e-06, "loss": 0.8353, "step": 267820 }, { "epoch": 4.702154181077617, "grad_norm": 0.05375852760739268, "learning_rate": 3.7998113854813334e-06, "loss": 0.8384, "step": 267830 }, { "epoch": 4.702329745957619, "grad_norm": 0.049126539923791235, "learning_rate": 3.7976967848832227e-06, "loss": 0.839, "step": 267840 }, { "epoch": 4.70250531083762, "grad_norm": 0.06392072175500237, "learning_rate": 3.7955834158569193e-06, "loss": 0.8395, "step": 267850 }, { "epoch": 4.702680875717622, "grad_norm": 0.050641115059739866, "learning_rate": 3.79347127842921e-06, "loss": 0.8488, "step": 267860 }, { "epoch": 4.702856440597623, "grad_norm": 0.05321547667054561, "learning_rate": 3.7913603726268358e-06, "loss": 0.8399, "step": 267870 }, { "epoch": 4.703032005477624, "grad_norm": 0.054352298472155454, "learning_rate": 3.7892506984765695e-06, "loss": 0.8403, "step": 267880 }, { "epoch": 4.703207570357626, "grad_norm": 0.049804500206362935, "learning_rate": 3.7871422560051327e-06, "loss": 0.8413, "step": 267890 }, { "epoch": 4.703383135237627, "grad_norm": 0.05921528545916879, "learning_rate": 3.7850350452392226e-06, "loss": 0.8379, "step": 267900 }, { "epoch": 4.703558700117629, "grad_norm": 0.04869878141737356, "learning_rate": 3.782929066205548e-06, "loss": 0.8451, "step": 267910 }, { "epoch": 4.70373426499763, "grad_norm": 0.06433041933637439, "learning_rate": 3.780824318930795e-06, "loss": 0.8335, "step": 267920 }, { "epoch": 4.703909829877631, "grad_norm": 0.045888515720987684, "learning_rate": 3.7787208034416393e-06, "loss": 0.838, "step": 267930 }, { "epoch": 4.704085394757633, "grad_norm": 0.05230375865738276, "learning_rate": 3.776618519764713e-06, "loss": 0.8414, "step": 267940 }, { "epoch": 4.704260959637634, "grad_norm": 0.041479614712258354, "learning_rate": 3.7745174679266582e-06, "loss": 0.8369, "step": 267950 }, { "epoch": 4.704436524517636, "grad_norm": 0.056199213238029996, "learning_rate": 3.7724176479541075e-06, "loss": 0.8324, "step": 267960 }, { "epoch": 4.704612089397637, "grad_norm": 0.04402875058024408, "learning_rate": 3.770319059873659e-06, "loss": 0.8422, "step": 267970 }, { "epoch": 4.704787654277638, "grad_norm": 0.04948152076759525, "learning_rate": 3.768221703711901e-06, "loss": 0.8419, "step": 267980 }, { "epoch": 4.70496321915764, "grad_norm": 0.050749469838027264, "learning_rate": 3.7661255794954096e-06, "loss": 0.8358, "step": 267990 }, { "epoch": 4.705138784037641, "grad_norm": 0.04818992754758732, "learning_rate": 3.7640306872507496e-06, "loss": 0.8378, "step": 268000 }, { "epoch": 4.705314348917643, "grad_norm": 0.04858907686783499, "learning_rate": 3.7619370270044453e-06, "loss": 0.838, "step": 268010 }, { "epoch": 4.705489913797644, "grad_norm": 0.05026133811011277, "learning_rate": 3.759844598783027e-06, "loss": 0.8388, "step": 268020 }, { "epoch": 4.705665478677645, "grad_norm": 0.04944871286473442, "learning_rate": 3.7577534026130295e-06, "loss": 0.8403, "step": 268030 }, { "epoch": 4.705841043557647, "grad_norm": 0.05132842869688889, "learning_rate": 3.755663438520919e-06, "loss": 0.84, "step": 268040 }, { "epoch": 4.706016608437648, "grad_norm": 0.05836427851759966, "learning_rate": 3.7535747065332054e-06, "loss": 0.8382, "step": 268050 }, { "epoch": 4.70619217331765, "grad_norm": 0.05641268315225926, "learning_rate": 3.7514872066763337e-06, "loss": 0.8402, "step": 268060 }, { "epoch": 4.706367738197651, "grad_norm": 0.04504060241912464, "learning_rate": 3.74940093897675e-06, "loss": 0.8403, "step": 268070 }, { "epoch": 4.706543303077653, "grad_norm": 0.04362603642609433, "learning_rate": 3.7473159034608976e-06, "loss": 0.8339, "step": 268080 }, { "epoch": 4.706718867957654, "grad_norm": 0.046312421192856806, "learning_rate": 3.745232100155179e-06, "loss": 0.8507, "step": 268090 }, { "epoch": 4.706894432837656, "grad_norm": 0.05050376132222978, "learning_rate": 3.743149529086027e-06, "loss": 0.8392, "step": 268100 }, { "epoch": 4.707069997717657, "grad_norm": 0.040868088687944115, "learning_rate": 3.741068190279809e-06, "loss": 0.8335, "step": 268110 }, { "epoch": 4.707245562597658, "grad_norm": 0.048492916702466186, "learning_rate": 3.738988083762883e-06, "loss": 0.8429, "step": 268120 }, { "epoch": 4.70742112747766, "grad_norm": 0.04243669346194222, "learning_rate": 3.736909209561628e-06, "loss": 0.834, "step": 268130 }, { "epoch": 4.707596692357661, "grad_norm": 0.0447825782779746, "learning_rate": 3.734831567702367e-06, "loss": 0.8365, "step": 268140 }, { "epoch": 4.707772257237663, "grad_norm": 0.06077382723983736, "learning_rate": 3.7327551582114358e-06, "loss": 0.8468, "step": 268150 }, { "epoch": 4.707947822117664, "grad_norm": 0.05574266363101695, "learning_rate": 3.7306799811151357e-06, "loss": 0.832, "step": 268160 }, { "epoch": 4.708123386997665, "grad_norm": 0.05018387083351964, "learning_rate": 3.728606036439768e-06, "loss": 0.8408, "step": 268170 }, { "epoch": 4.708298951877667, "grad_norm": 0.047827086955033175, "learning_rate": 3.726533324211592e-06, "loss": 0.8391, "step": 268180 }, { "epoch": 4.708474516757668, "grad_norm": 0.07035560723130747, "learning_rate": 3.7244618444568885e-06, "loss": 0.8409, "step": 268190 }, { "epoch": 4.7086500816376695, "grad_norm": 0.052023283170296905, "learning_rate": 3.7223915972019018e-06, "loss": 0.8345, "step": 268200 }, { "epoch": 4.7088256465176705, "grad_norm": 0.09031249538352809, "learning_rate": 3.7203225824728377e-06, "loss": 0.8368, "step": 268210 }, { "epoch": 4.709001211397672, "grad_norm": 0.0486240387696323, "learning_rate": 3.718254800295952e-06, "loss": 0.8381, "step": 268220 }, { "epoch": 4.7091767762776735, "grad_norm": 0.04055463146321824, "learning_rate": 3.7161882506974165e-06, "loss": 0.8437, "step": 268230 }, { "epoch": 4.7093523411576745, "grad_norm": 0.05013024350979586, "learning_rate": 3.7141229337034107e-06, "loss": 0.8454, "step": 268240 }, { "epoch": 4.7095279060376765, "grad_norm": 0.06472358484529285, "learning_rate": 3.7120588493401165e-06, "loss": 0.8402, "step": 268250 }, { "epoch": 4.7097034709176775, "grad_norm": 0.055675903210994, "learning_rate": 3.709995997633682e-06, "loss": 0.8346, "step": 268260 }, { "epoch": 4.709879035797679, "grad_norm": 0.05816234749183398, "learning_rate": 3.7079343786102547e-06, "loss": 0.841, "step": 268270 }, { "epoch": 4.7100546006776804, "grad_norm": 0.0552588742969346, "learning_rate": 3.705873992295929e-06, "loss": 0.8398, "step": 268280 }, { "epoch": 4.7102301655576815, "grad_norm": 0.050290155211311506, "learning_rate": 3.70381483871683e-06, "loss": 0.8299, "step": 268290 }, { "epoch": 4.710405730437683, "grad_norm": 0.0443909423870249, "learning_rate": 3.7017569178990507e-06, "loss": 0.8352, "step": 268300 }, { "epoch": 4.710581295317684, "grad_norm": 0.05046305017856473, "learning_rate": 3.6997002298686524e-06, "loss": 0.8397, "step": 268310 }, { "epoch": 4.710756860197686, "grad_norm": 0.051336890736669305, "learning_rate": 3.697644774651704e-06, "loss": 0.8401, "step": 268320 }, { "epoch": 4.710932425077687, "grad_norm": 0.05422910338105319, "learning_rate": 3.695590552274235e-06, "loss": 0.8356, "step": 268330 }, { "epoch": 4.711107989957689, "grad_norm": 0.05579917912327898, "learning_rate": 3.693537562762293e-06, "loss": 0.8364, "step": 268340 }, { "epoch": 4.71128355483769, "grad_norm": 0.06522176636176132, "learning_rate": 3.6914858061418743e-06, "loss": 0.8436, "step": 268350 }, { "epoch": 4.711459119717691, "grad_norm": 0.06130806855629285, "learning_rate": 3.6894352824389702e-06, "loss": 0.8365, "step": 268360 }, { "epoch": 4.711634684597693, "grad_norm": 0.04565619025526522, "learning_rate": 3.6873859916795887e-06, "loss": 0.8462, "step": 268370 }, { "epoch": 4.711810249477694, "grad_norm": 0.059040990567443004, "learning_rate": 3.6853379338896556e-06, "loss": 0.8446, "step": 268380 }, { "epoch": 4.711985814357696, "grad_norm": 0.05403848221015146, "learning_rate": 3.6832911090951563e-06, "loss": 0.8363, "step": 268390 }, { "epoch": 4.712161379237697, "grad_norm": 0.05689480997752656, "learning_rate": 3.681245517322017e-06, "loss": 0.8302, "step": 268400 }, { "epoch": 4.712336944117698, "grad_norm": 0.0403867055852201, "learning_rate": 3.679201158596134e-06, "loss": 0.8342, "step": 268410 }, { "epoch": 4.7125125089977, "grad_norm": 0.04902252652945765, "learning_rate": 3.6771580329434257e-06, "loss": 0.8438, "step": 268420 }, { "epoch": 4.712688073877701, "grad_norm": 0.05897045813171033, "learning_rate": 3.675116140389774e-06, "loss": 0.8338, "step": 268430 }, { "epoch": 4.712863638757703, "grad_norm": 0.06588643412872122, "learning_rate": 3.6730754809610648e-06, "loss": 0.8318, "step": 268440 }, { "epoch": 4.713039203637704, "grad_norm": 0.05542095460322553, "learning_rate": 3.671036054683127e-06, "loss": 0.838, "step": 268450 }, { "epoch": 4.713214768517705, "grad_norm": 0.04997675066370838, "learning_rate": 3.6689978615818245e-06, "loss": 0.8299, "step": 268460 }, { "epoch": 4.713390333397707, "grad_norm": 0.11109924894923964, "learning_rate": 3.6669609016829726e-06, "loss": 0.8388, "step": 268470 }, { "epoch": 4.713565898277708, "grad_norm": 0.0386646131723094, "learning_rate": 3.6649251750123694e-06, "loss": 0.8471, "step": 268480 }, { "epoch": 4.71374146315771, "grad_norm": 0.052773827256902765, "learning_rate": 3.662890681595833e-06, "loss": 0.8356, "step": 268490 }, { "epoch": 4.713917028037711, "grad_norm": 0.05065577891515663, "learning_rate": 3.6608574214591164e-06, "loss": 0.844, "step": 268500 }, { "epoch": 4.714092592917713, "grad_norm": 0.05207206632162683, "learning_rate": 3.6588253946279932e-06, "loss": 0.8367, "step": 268510 }, { "epoch": 4.714268157797714, "grad_norm": 0.045798426723981676, "learning_rate": 3.6567946011282163e-06, "loss": 0.8418, "step": 268520 }, { "epoch": 4.714443722677715, "grad_norm": 0.06520524129564617, "learning_rate": 3.654765040985493e-06, "loss": 0.836, "step": 268530 }, { "epoch": 4.714619287557717, "grad_norm": 0.08390155965076258, "learning_rate": 3.6527367142255667e-06, "loss": 0.8404, "step": 268540 }, { "epoch": 4.714794852437718, "grad_norm": 0.06386740217687596, "learning_rate": 3.6507096208741e-06, "loss": 0.8328, "step": 268550 }, { "epoch": 4.71497041731772, "grad_norm": 0.04562902804427964, "learning_rate": 3.648683760956824e-06, "loss": 0.8415, "step": 268560 }, { "epoch": 4.715145982197721, "grad_norm": 0.05449404725548073, "learning_rate": 3.6466591344993818e-06, "loss": 0.8431, "step": 268570 }, { "epoch": 4.715321547077723, "grad_norm": 0.0546248765180241, "learning_rate": 3.644635741527415e-06, "loss": 0.8344, "step": 268580 }, { "epoch": 4.715497111957724, "grad_norm": 0.04856117467364646, "learning_rate": 3.6426135820665666e-06, "loss": 0.8405, "step": 268590 }, { "epoch": 4.715672676837725, "grad_norm": 0.04323841681414147, "learning_rate": 3.640592656142468e-06, "loss": 0.8492, "step": 268600 }, { "epoch": 4.715848241717727, "grad_norm": 0.05092809809426667, "learning_rate": 3.6385729637807285e-06, "loss": 0.8377, "step": 268610 }, { "epoch": 4.716023806597728, "grad_norm": 0.053593703250541164, "learning_rate": 3.6365545050069254e-06, "loss": 0.837, "step": 268620 }, { "epoch": 4.71619937147773, "grad_norm": 0.08103235782942524, "learning_rate": 3.6345372798466225e-06, "loss": 0.844, "step": 268630 }, { "epoch": 4.716374936357731, "grad_norm": 0.056122499315986375, "learning_rate": 3.6325212883254094e-06, "loss": 0.8345, "step": 268640 }, { "epoch": 4.716550501237732, "grad_norm": 0.04874897235925137, "learning_rate": 3.6305065304688065e-06, "loss": 0.8415, "step": 268650 }, { "epoch": 4.716726066117734, "grad_norm": 0.04432560248580024, "learning_rate": 3.628493006302347e-06, "loss": 0.8417, "step": 268660 }, { "epoch": 4.716901630997735, "grad_norm": 0.043925526951845095, "learning_rate": 3.62648071585153e-06, "loss": 0.8446, "step": 268670 }, { "epoch": 4.717077195877737, "grad_norm": 0.04960844672784375, "learning_rate": 3.624469659141888e-06, "loss": 0.8431, "step": 268680 }, { "epoch": 4.717252760757738, "grad_norm": 0.05412862470503659, "learning_rate": 3.622459836198855e-06, "loss": 0.8379, "step": 268690 }, { "epoch": 4.717428325637739, "grad_norm": 0.06133956123760838, "learning_rate": 3.6204512470479304e-06, "loss": 0.8298, "step": 268700 }, { "epoch": 4.717603890517741, "grad_norm": 0.04721072868421729, "learning_rate": 3.6184438917145473e-06, "loss": 0.8401, "step": 268710 }, { "epoch": 4.717779455397742, "grad_norm": 0.059714094076333595, "learning_rate": 3.616437770224141e-06, "loss": 0.8376, "step": 268720 }, { "epoch": 4.717955020277744, "grad_norm": 0.05361939334742361, "learning_rate": 3.6144328826021326e-06, "loss": 0.8395, "step": 268730 }, { "epoch": 4.718130585157745, "grad_norm": 0.052166751594149914, "learning_rate": 3.6124292288739344e-06, "loss": 0.8389, "step": 268740 }, { "epoch": 4.718306150037747, "grad_norm": 0.04545535772040222, "learning_rate": 3.6104268090649034e-06, "loss": 0.838, "step": 268750 }, { "epoch": 4.718481714917748, "grad_norm": 0.05317437653373611, "learning_rate": 3.6084256232004397e-06, "loss": 0.836, "step": 268760 }, { "epoch": 4.71865727979775, "grad_norm": 0.05298638717813309, "learning_rate": 3.6064256713058894e-06, "loss": 0.8388, "step": 268770 }, { "epoch": 4.718832844677751, "grad_norm": 0.05005030641820667, "learning_rate": 3.604426953406587e-06, "loss": 0.8418, "step": 268780 }, { "epoch": 4.719008409557752, "grad_norm": 0.0559413773249924, "learning_rate": 3.6024294695278668e-06, "loss": 0.8369, "step": 268790 }, { "epoch": 4.719183974437754, "grad_norm": 0.05333492398650087, "learning_rate": 3.600433219695021e-06, "loss": 0.8427, "step": 268800 }, { "epoch": 4.719359539317755, "grad_norm": 0.04687568835939078, "learning_rate": 3.5984382039333714e-06, "loss": 0.8376, "step": 268810 }, { "epoch": 4.719535104197757, "grad_norm": 0.04449628826822418, "learning_rate": 3.5964444222681554e-06, "loss": 0.8343, "step": 268820 }, { "epoch": 4.719710669077758, "grad_norm": 0.05099617604266089, "learning_rate": 3.5944518747246636e-06, "loss": 0.8381, "step": 268830 }, { "epoch": 4.719886233957759, "grad_norm": 0.05887111457347818, "learning_rate": 3.5924605613281413e-06, "loss": 0.8393, "step": 268840 }, { "epoch": 4.720061798837761, "grad_norm": 0.04685161243813024, "learning_rate": 3.590470482103804e-06, "loss": 0.8365, "step": 268850 }, { "epoch": 4.720237363717762, "grad_norm": 0.04557980856678719, "learning_rate": 3.5884816370768765e-06, "loss": 0.8371, "step": 268860 }, { "epoch": 4.720412928597764, "grad_norm": 0.04302987046707537, "learning_rate": 3.5864940262725502e-06, "loss": 0.8386, "step": 268870 }, { "epoch": 4.720588493477765, "grad_norm": 0.05388780879320173, "learning_rate": 3.5845076497160277e-06, "loss": 0.8406, "step": 268880 }, { "epoch": 4.720764058357766, "grad_norm": 0.04636115846780166, "learning_rate": 3.5825225074324455e-06, "loss": 0.8424, "step": 268890 }, { "epoch": 4.720939623237768, "grad_norm": 0.05473438435222995, "learning_rate": 3.580538599446985e-06, "loss": 0.836, "step": 268900 }, { "epoch": 4.721115188117769, "grad_norm": 0.08695770468107591, "learning_rate": 3.5785559257847727e-06, "loss": 0.8318, "step": 268910 }, { "epoch": 4.721290752997771, "grad_norm": 0.06932711466030748, "learning_rate": 3.5765744864709225e-06, "loss": 0.8444, "step": 268920 }, { "epoch": 4.721466317877772, "grad_norm": 0.050924218267517606, "learning_rate": 3.574594281530538e-06, "loss": 0.8369, "step": 268930 }, { "epoch": 4.7216418827577735, "grad_norm": 0.1083503914414677, "learning_rate": 3.572615310988725e-06, "loss": 0.838, "step": 268940 }, { "epoch": 4.721817447637775, "grad_norm": 0.05088264901697227, "learning_rate": 3.5706375748705537e-06, "loss": 0.8388, "step": 268950 }, { "epoch": 4.721993012517776, "grad_norm": 0.05911842546522891, "learning_rate": 3.5686610732010606e-06, "loss": 0.8363, "step": 268960 }, { "epoch": 4.7221685773977775, "grad_norm": 0.04365734526738008, "learning_rate": 3.566685806005307e-06, "loss": 0.8373, "step": 268970 }, { "epoch": 4.7223441422777785, "grad_norm": 0.05024924303431537, "learning_rate": 3.564711773308331e-06, "loss": 0.8359, "step": 268980 }, { "epoch": 4.7225197071577805, "grad_norm": 0.04309548834692126, "learning_rate": 3.5627389751351147e-06, "loss": 0.8441, "step": 268990 }, { "epoch": 4.7226952720377815, "grad_norm": 0.05026617082062767, "learning_rate": 3.5607674115106752e-06, "loss": 0.8395, "step": 269000 }, { "epoch": 4.722870836917783, "grad_norm": 0.06282115733973297, "learning_rate": 3.5587970824599833e-06, "loss": 0.8354, "step": 269010 }, { "epoch": 4.7230464017977845, "grad_norm": 0.04561406244055906, "learning_rate": 3.5568279880080225e-06, "loss": 0.8343, "step": 269020 }, { "epoch": 4.7232219666777855, "grad_norm": 0.06219275744358239, "learning_rate": 3.5548601281797114e-06, "loss": 0.8415, "step": 269030 }, { "epoch": 4.723397531557787, "grad_norm": 0.046964502985725735, "learning_rate": 3.5528935029999976e-06, "loss": 0.8345, "step": 269040 }, { "epoch": 4.723573096437788, "grad_norm": 0.06292337348664616, "learning_rate": 3.55092811249381e-06, "loss": 0.8474, "step": 269050 }, { "epoch": 4.72374866131779, "grad_norm": 0.05810839397972513, "learning_rate": 3.5489639566860225e-06, "loss": 0.8388, "step": 269060 }, { "epoch": 4.723924226197791, "grad_norm": 0.042595063789370474, "learning_rate": 3.5470010356015515e-06, "loss": 0.8351, "step": 269070 }, { "epoch": 4.724099791077792, "grad_norm": 0.04447680112629358, "learning_rate": 3.545039349265248e-06, "loss": 0.8425, "step": 269080 }, { "epoch": 4.724275355957794, "grad_norm": 0.047674377811637685, "learning_rate": 3.5430788977019743e-06, "loss": 0.8357, "step": 269090 }, { "epoch": 4.724450920837795, "grad_norm": 0.05404903839945475, "learning_rate": 3.5411196809365708e-06, "loss": 0.846, "step": 269100 }, { "epoch": 4.724626485717797, "grad_norm": 0.06950413256772534, "learning_rate": 3.539161698993855e-06, "loss": 0.8354, "step": 269110 }, { "epoch": 4.724802050597798, "grad_norm": 0.04889943578408305, "learning_rate": 3.5372049518986457e-06, "loss": 0.8279, "step": 269120 }, { "epoch": 4.724977615477799, "grad_norm": 0.0480542436559821, "learning_rate": 3.535249439675717e-06, "loss": 0.842, "step": 269130 }, { "epoch": 4.725153180357801, "grad_norm": 0.040292265126041206, "learning_rate": 3.5332951623498646e-06, "loss": 0.8496, "step": 269140 }, { "epoch": 4.725328745237802, "grad_norm": 0.04880649381763071, "learning_rate": 3.5313421199458406e-06, "loss": 0.8433, "step": 269150 }, { "epoch": 4.725504310117804, "grad_norm": 0.05156150889033575, "learning_rate": 3.529390312488387e-06, "loss": 0.8395, "step": 269160 }, { "epoch": 4.725679874997805, "grad_norm": 0.047700728537964254, "learning_rate": 3.5274397400022455e-06, "loss": 0.8435, "step": 269170 }, { "epoch": 4.725855439877807, "grad_norm": 0.06715096942247298, "learning_rate": 3.525490402512111e-06, "loss": 0.8513, "step": 269180 }, { "epoch": 4.726031004757808, "grad_norm": 0.04104618342561526, "learning_rate": 3.5235423000427162e-06, "loss": 0.8362, "step": 269190 }, { "epoch": 4.726206569637809, "grad_norm": 0.054945337014982935, "learning_rate": 3.521595432618702e-06, "loss": 0.8407, "step": 269200 }, { "epoch": 4.726382134517811, "grad_norm": 0.04773776804990481, "learning_rate": 3.519649800264766e-06, "loss": 0.8348, "step": 269210 }, { "epoch": 4.726557699397812, "grad_norm": 0.04407269312451477, "learning_rate": 3.5177054030055506e-06, "loss": 0.8325, "step": 269220 }, { "epoch": 4.726733264277814, "grad_norm": 0.05597698368461701, "learning_rate": 3.5157622408656652e-06, "loss": 0.8393, "step": 269230 }, { "epoch": 4.726908829157815, "grad_norm": 0.05744742235048056, "learning_rate": 3.513820313869784e-06, "loss": 0.8371, "step": 269240 }, { "epoch": 4.727084394037817, "grad_norm": 0.04913781021358177, "learning_rate": 3.5118796220424742e-06, "loss": 0.8357, "step": 269250 }, { "epoch": 4.727259958917818, "grad_norm": 0.05038304050284769, "learning_rate": 3.5099401654083325e-06, "loss": 0.8428, "step": 269260 }, { "epoch": 4.727435523797819, "grad_norm": 0.06456451208723778, "learning_rate": 3.5080019439919362e-06, "loss": 0.8364, "step": 269270 }, { "epoch": 4.727611088677821, "grad_norm": 0.07261181225181651, "learning_rate": 3.5060649578178285e-06, "loss": 0.8376, "step": 269280 }, { "epoch": 4.727786653557822, "grad_norm": 0.04507683695476735, "learning_rate": 3.5041292069105747e-06, "loss": 0.8448, "step": 269290 }, { "epoch": 4.727962218437824, "grad_norm": 0.06541665504492263, "learning_rate": 3.5021946912946845e-06, "loss": 0.8266, "step": 269300 }, { "epoch": 4.728137783317825, "grad_norm": 0.058600724569097304, "learning_rate": 3.5002614109946693e-06, "loss": 0.8407, "step": 269310 }, { "epoch": 4.728313348197826, "grad_norm": 0.05292174167642983, "learning_rate": 3.4983293660350393e-06, "loss": 0.8403, "step": 269320 }, { "epoch": 4.728488913077828, "grad_norm": 0.06268306109018133, "learning_rate": 3.4963985564402487e-06, "loss": 0.8393, "step": 269330 }, { "epoch": 4.728664477957829, "grad_norm": 0.06134896965182258, "learning_rate": 3.4944689822347777e-06, "loss": 0.8356, "step": 269340 }, { "epoch": 4.728840042837831, "grad_norm": 0.054885534902796684, "learning_rate": 3.4925406434430674e-06, "loss": 0.8334, "step": 269350 }, { "epoch": 4.729015607717832, "grad_norm": 0.048584653554051574, "learning_rate": 3.490613540089564e-06, "loss": 0.8393, "step": 269360 }, { "epoch": 4.729191172597833, "grad_norm": 0.057768663868710254, "learning_rate": 3.4886876721986685e-06, "loss": 0.8431, "step": 269370 }, { "epoch": 4.729366737477835, "grad_norm": 0.046642804022550874, "learning_rate": 3.4867630397947914e-06, "loss": 0.8345, "step": 269380 }, { "epoch": 4.729542302357836, "grad_norm": 0.04052729191247539, "learning_rate": 3.4848396429023227e-06, "loss": 0.8465, "step": 269390 }, { "epoch": 4.729717867237838, "grad_norm": 0.0619121356996399, "learning_rate": 3.4829174815456077e-06, "loss": 0.8394, "step": 269400 }, { "epoch": 4.729893432117839, "grad_norm": 0.05610915667103477, "learning_rate": 3.480996555749025e-06, "loss": 0.8308, "step": 269410 }, { "epoch": 4.730068996997841, "grad_norm": 0.04593302920879009, "learning_rate": 3.479076865536908e-06, "loss": 0.8317, "step": 269420 }, { "epoch": 4.730244561877842, "grad_norm": 0.05633501317087239, "learning_rate": 3.477158410933582e-06, "loss": 0.8427, "step": 269430 }, { "epoch": 4.730420126757844, "grad_norm": 0.04704188402219465, "learning_rate": 3.4752411919633367e-06, "loss": 0.8376, "step": 269440 }, { "epoch": 4.730595691637845, "grad_norm": 0.058105486766959424, "learning_rate": 3.473325208650474e-06, "loss": 0.8365, "step": 269450 }, { "epoch": 4.730771256517846, "grad_norm": 0.05554448229249022, "learning_rate": 3.4714104610192853e-06, "loss": 0.8374, "step": 269460 }, { "epoch": 4.730946821397848, "grad_norm": 0.04350490422520714, "learning_rate": 3.4694969490940146e-06, "loss": 0.8451, "step": 269470 }, { "epoch": 4.731122386277849, "grad_norm": 0.09033786576875727, "learning_rate": 3.4675846728989e-06, "loss": 0.8303, "step": 269480 }, { "epoch": 4.731297951157851, "grad_norm": 0.06429272534682902, "learning_rate": 3.465673632458187e-06, "loss": 0.837, "step": 269490 }, { "epoch": 4.731473516037852, "grad_norm": 0.043894884781348266, "learning_rate": 3.463763827796078e-06, "loss": 0.8372, "step": 269500 }, { "epoch": 4.731649080917853, "grad_norm": 0.07330127344480698, "learning_rate": 3.461855258936776e-06, "loss": 0.8436, "step": 269510 }, { "epoch": 4.731824645797855, "grad_norm": 0.05948061335630108, "learning_rate": 3.4599479259044624e-06, "loss": 0.8411, "step": 269520 }, { "epoch": 4.732000210677856, "grad_norm": 0.04035450662529167, "learning_rate": 3.458041828723306e-06, "loss": 0.8416, "step": 269530 }, { "epoch": 4.732175775557858, "grad_norm": 0.05287651085333652, "learning_rate": 3.4561369674174434e-06, "loss": 0.8337, "step": 269540 }, { "epoch": 4.732351340437859, "grad_norm": 0.04213063850500086, "learning_rate": 3.4542333420110236e-06, "loss": 0.8375, "step": 269550 }, { "epoch": 4.73252690531786, "grad_norm": 0.046697410556363186, "learning_rate": 3.4523309525281704e-06, "loss": 0.8366, "step": 269560 }, { "epoch": 4.732702470197862, "grad_norm": 0.04926070673824322, "learning_rate": 3.4504297989929555e-06, "loss": 0.846, "step": 269570 }, { "epoch": 4.732878035077863, "grad_norm": 0.05368254963441094, "learning_rate": 3.448529881429516e-06, "loss": 0.8359, "step": 269580 }, { "epoch": 4.733053599957865, "grad_norm": 0.048665063336206835, "learning_rate": 3.446631199861888e-06, "loss": 0.8429, "step": 269590 }, { "epoch": 4.733229164837866, "grad_norm": 0.0529307168854671, "learning_rate": 3.4447337543141443e-06, "loss": 0.8397, "step": 269600 }, { "epoch": 4.733404729717867, "grad_norm": 0.05058404717534522, "learning_rate": 3.442837544810311e-06, "loss": 0.838, "step": 269610 }, { "epoch": 4.733580294597869, "grad_norm": 0.07772687331494625, "learning_rate": 3.4409425713744253e-06, "loss": 0.8333, "step": 269620 }, { "epoch": 4.73375585947787, "grad_norm": 0.04234404413806608, "learning_rate": 3.4390488340304927e-06, "loss": 0.8356, "step": 269630 }, { "epoch": 4.733931424357872, "grad_norm": 0.06404034287910282, "learning_rate": 3.437156332802518e-06, "loss": 0.8341, "step": 269640 }, { "epoch": 4.734106989237873, "grad_norm": 0.05839491460251181, "learning_rate": 3.4352650677144613e-06, "loss": 0.8445, "step": 269650 }, { "epoch": 4.734282554117875, "grad_norm": 0.03892606185574389, "learning_rate": 3.4333750387903054e-06, "loss": 0.8408, "step": 269660 }, { "epoch": 4.734458118997876, "grad_norm": 0.04713257478855645, "learning_rate": 3.431486246053968e-06, "loss": 0.8456, "step": 269670 }, { "epoch": 4.7346336838778775, "grad_norm": 0.06585172825441309, "learning_rate": 3.42959868952941e-06, "loss": 0.8351, "step": 269680 }, { "epoch": 4.734809248757879, "grad_norm": 0.05936409023889388, "learning_rate": 3.4277123692405363e-06, "loss": 0.832, "step": 269690 }, { "epoch": 4.73498481363788, "grad_norm": 0.052184318264816294, "learning_rate": 3.4258272852112425e-06, "loss": 0.842, "step": 269700 }, { "epoch": 4.7351603785178815, "grad_norm": 0.054481570397606836, "learning_rate": 3.4239434374654233e-06, "loss": 0.8312, "step": 269710 }, { "epoch": 4.7353359433978826, "grad_norm": 0.047576752968868645, "learning_rate": 3.4220608260269295e-06, "loss": 0.8357, "step": 269720 }, { "epoch": 4.7355115082778845, "grad_norm": 0.048991907386912864, "learning_rate": 3.420179450919645e-06, "loss": 0.8317, "step": 269730 }, { "epoch": 4.7356870731578855, "grad_norm": 0.04564526755012522, "learning_rate": 3.418299312167366e-06, "loss": 0.836, "step": 269740 }, { "epoch": 4.7358626380378865, "grad_norm": 0.056696439591082615, "learning_rate": 3.4164204097939543e-06, "loss": 0.8333, "step": 269750 }, { "epoch": 4.7360382029178885, "grad_norm": 0.054347137495963614, "learning_rate": 3.4145427438231943e-06, "loss": 0.8455, "step": 269760 }, { "epoch": 4.7362137677978895, "grad_norm": 0.08086928723077988, "learning_rate": 3.412666314278873e-06, "loss": 0.847, "step": 269770 }, { "epoch": 4.736389332677891, "grad_norm": 0.06403669676698799, "learning_rate": 3.4107911211847725e-06, "loss": 0.8423, "step": 269780 }, { "epoch": 4.7365648975578925, "grad_norm": 0.05864112859358978, "learning_rate": 3.408917164564658e-06, "loss": 0.8362, "step": 269790 }, { "epoch": 4.7367404624378935, "grad_norm": 0.04743468806774644, "learning_rate": 3.4070444444422584e-06, "loss": 0.8359, "step": 269800 }, { "epoch": 4.736916027317895, "grad_norm": 0.04452380574709382, "learning_rate": 3.4051729608413152e-06, "loss": 0.84, "step": 269810 }, { "epoch": 4.737091592197896, "grad_norm": 0.04695557089998945, "learning_rate": 3.403302713785536e-06, "loss": 0.844, "step": 269820 }, { "epoch": 4.737267157077898, "grad_norm": 0.05285343501918645, "learning_rate": 3.401433703298607e-06, "loss": 0.84, "step": 269830 }, { "epoch": 4.737442721957899, "grad_norm": 0.06115927283171212, "learning_rate": 3.3995659294042253e-06, "loss": 0.8371, "step": 269840 }, { "epoch": 4.737618286837901, "grad_norm": 0.05387899016999972, "learning_rate": 3.397699392126045e-06, "loss": 0.8403, "step": 269850 }, { "epoch": 4.737793851717902, "grad_norm": 0.04682105079652921, "learning_rate": 3.395834091487717e-06, "loss": 0.84, "step": 269860 }, { "epoch": 4.737969416597903, "grad_norm": 0.05180520754531565, "learning_rate": 3.3939700275128855e-06, "loss": 0.8368, "step": 269870 }, { "epoch": 4.738144981477905, "grad_norm": 0.04817191731868438, "learning_rate": 3.3921072002251477e-06, "loss": 0.8351, "step": 269880 }, { "epoch": 4.738320546357906, "grad_norm": 0.04868056540424003, "learning_rate": 3.3902456096481224e-06, "loss": 0.8404, "step": 269890 }, { "epoch": 4.738496111237908, "grad_norm": 0.05329617074551716, "learning_rate": 3.3883852558054e-06, "loss": 0.8318, "step": 269900 }, { "epoch": 4.738671676117909, "grad_norm": 0.05024651740091856, "learning_rate": 3.3865261387205314e-06, "loss": 0.8403, "step": 269910 }, { "epoch": 4.738847240997911, "grad_norm": 0.04628069948887886, "learning_rate": 3.3846682584170945e-06, "loss": 0.8419, "step": 269920 }, { "epoch": 4.739022805877912, "grad_norm": 0.04754536325785593, "learning_rate": 3.3828116149186212e-06, "loss": 0.8348, "step": 269930 }, { "epoch": 4.739198370757913, "grad_norm": 0.0504123447451637, "learning_rate": 3.3809562082486214e-06, "loss": 0.8305, "step": 269940 }, { "epoch": 4.739373935637915, "grad_norm": 0.04336624477960971, "learning_rate": 3.3791020384306163e-06, "loss": 0.8468, "step": 269950 }, { "epoch": 4.739549500517916, "grad_norm": 0.05272605369849274, "learning_rate": 3.3772491054881046e-06, "loss": 0.8318, "step": 269960 }, { "epoch": 4.739725065397918, "grad_norm": 0.04818259982933336, "learning_rate": 3.3753974094445544e-06, "loss": 0.8343, "step": 269970 }, { "epoch": 4.739900630277919, "grad_norm": 0.05307661307458954, "learning_rate": 3.37354695032343e-06, "loss": 0.8356, "step": 269980 }, { "epoch": 4.74007619515792, "grad_norm": 0.04797990388724096, "learning_rate": 3.3716977281481656e-06, "loss": 0.8389, "step": 269990 }, { "epoch": 4.740251760037922, "grad_norm": 0.045578064110531884, "learning_rate": 3.369849742942216e-06, "loss": 0.8442, "step": 270000 }, { "epoch": 4.740427324917923, "grad_norm": 0.06106701546868061, "learning_rate": 3.3680029947289708e-06, "loss": 0.843, "step": 270010 }, { "epoch": 4.740602889797925, "grad_norm": 0.051875651410074226, "learning_rate": 3.3661574835318415e-06, "loss": 0.8402, "step": 270020 }, { "epoch": 4.740778454677926, "grad_norm": 0.06168508521636349, "learning_rate": 3.364313209374205e-06, "loss": 0.836, "step": 270030 }, { "epoch": 4.740954019557927, "grad_norm": 0.05106223989197634, "learning_rate": 3.3624701722794305e-06, "loss": 0.8415, "step": 270040 }, { "epoch": 4.741129584437929, "grad_norm": 0.05129241486571749, "learning_rate": 3.3606283722708727e-06, "loss": 0.8363, "step": 270050 }, { "epoch": 4.74130514931793, "grad_norm": 0.06002905708372772, "learning_rate": 3.3587878093718674e-06, "loss": 0.8403, "step": 270060 }, { "epoch": 4.741480714197932, "grad_norm": 0.046288988501109934, "learning_rate": 3.3569484836057486e-06, "loss": 0.8425, "step": 270070 }, { "epoch": 4.741656279077933, "grad_norm": 0.09949551666996594, "learning_rate": 3.3551103949957724e-06, "loss": 0.8456, "step": 270080 }, { "epoch": 4.741831843957935, "grad_norm": 0.04433764247175743, "learning_rate": 3.3532735435652967e-06, "loss": 0.843, "step": 270090 }, { "epoch": 4.742007408837936, "grad_norm": 0.0512682538494922, "learning_rate": 3.351437929337545e-06, "loss": 0.8448, "step": 270100 }, { "epoch": 4.742182973717938, "grad_norm": 0.06002899844307116, "learning_rate": 3.3496035523357857e-06, "loss": 0.8384, "step": 270110 }, { "epoch": 4.742358538597939, "grad_norm": 0.04659568886838669, "learning_rate": 3.3477704125832552e-06, "loss": 0.8382, "step": 270120 }, { "epoch": 4.74253410347794, "grad_norm": 0.05258074437133813, "learning_rate": 3.3459385101031984e-06, "loss": 0.8387, "step": 270130 }, { "epoch": 4.742709668357942, "grad_norm": 0.05973101442031826, "learning_rate": 3.3441078449188185e-06, "loss": 0.8498, "step": 270140 }, { "epoch": 4.742885233237943, "grad_norm": 0.060622078124196156, "learning_rate": 3.342278417053308e-06, "loss": 0.8446, "step": 270150 }, { "epoch": 4.743060798117945, "grad_norm": 0.04523119719923329, "learning_rate": 3.3404502265298455e-06, "loss": 0.8376, "step": 270160 }, { "epoch": 4.743236362997946, "grad_norm": 0.041703477329352076, "learning_rate": 3.3386232733715905e-06, "loss": 0.8437, "step": 270170 }, { "epoch": 4.743411927877947, "grad_norm": 0.04936135184178244, "learning_rate": 3.3367975576017008e-06, "loss": 0.8307, "step": 270180 }, { "epoch": 4.743587492757949, "grad_norm": 0.05337973641459756, "learning_rate": 3.334973079243292e-06, "loss": 0.8399, "step": 270190 }, { "epoch": 4.74376305763795, "grad_norm": 0.04650477352865164, "learning_rate": 3.333149838319499e-06, "loss": 0.8339, "step": 270200 }, { "epoch": 4.743938622517952, "grad_norm": 0.04681623076168162, "learning_rate": 3.3313278348534273e-06, "loss": 0.8378, "step": 270210 }, { "epoch": 4.744114187397953, "grad_norm": 0.058882833674960416, "learning_rate": 3.3295070688681354e-06, "loss": 0.8408, "step": 270220 }, { "epoch": 4.744289752277954, "grad_norm": 0.054821604788888, "learning_rate": 3.327687540386705e-06, "loss": 0.839, "step": 270230 }, { "epoch": 4.744465317157956, "grad_norm": 0.06155126147987241, "learning_rate": 3.3258692494322065e-06, "loss": 0.8444, "step": 270240 }, { "epoch": 4.744640882037957, "grad_norm": 0.048239057229242883, "learning_rate": 3.3240521960276436e-06, "loss": 0.839, "step": 270250 }, { "epoch": 4.744816446917959, "grad_norm": 0.0555163726989012, "learning_rate": 3.3222363801960775e-06, "loss": 0.8379, "step": 270260 }, { "epoch": 4.74499201179796, "grad_norm": 0.05537660310329743, "learning_rate": 3.32042180196049e-06, "loss": 0.8376, "step": 270270 }, { "epoch": 4.745167576677961, "grad_norm": 0.057296672000888074, "learning_rate": 3.318608461343875e-06, "loss": 0.8381, "step": 270280 }, { "epoch": 4.745343141557963, "grad_norm": 0.043314535226188305, "learning_rate": 3.3167963583692148e-06, "loss": 0.8376, "step": 270290 }, { "epoch": 4.745518706437964, "grad_norm": 0.048847542111684156, "learning_rate": 3.31498549305947e-06, "loss": 0.8379, "step": 270300 }, { "epoch": 4.745694271317966, "grad_norm": 0.05141517850584879, "learning_rate": 3.313175865437579e-06, "loss": 0.8316, "step": 270310 }, { "epoch": 4.745869836197967, "grad_norm": 0.10218970380104432, "learning_rate": 3.31136747552646e-06, "loss": 0.849, "step": 270320 }, { "epoch": 4.746045401077969, "grad_norm": 0.07058957721385145, "learning_rate": 3.3095603233490494e-06, "loss": 0.8354, "step": 270330 }, { "epoch": 4.74622096595797, "grad_norm": 0.05203770393631048, "learning_rate": 3.3077544089282326e-06, "loss": 0.8416, "step": 270340 }, { "epoch": 4.746396530837972, "grad_norm": 0.05857776532731572, "learning_rate": 3.3059497322868815e-06, "loss": 0.835, "step": 270350 }, { "epoch": 4.746572095717973, "grad_norm": 0.04939546912427993, "learning_rate": 3.304146293447869e-06, "loss": 0.8361, "step": 270360 }, { "epoch": 4.746747660597974, "grad_norm": 0.06209039872546478, "learning_rate": 3.302344092434046e-06, "loss": 0.8405, "step": 270370 }, { "epoch": 4.746923225477976, "grad_norm": 0.051016435403898705, "learning_rate": 3.300543129268252e-06, "loss": 0.8364, "step": 270380 }, { "epoch": 4.747098790357977, "grad_norm": 0.06416328594709414, "learning_rate": 3.2987434039733047e-06, "loss": 0.8313, "step": 270390 }, { "epoch": 4.747274355237979, "grad_norm": 0.056149340235371284, "learning_rate": 3.2969449165719895e-06, "loss": 0.8416, "step": 270400 }, { "epoch": 4.74744992011798, "grad_norm": 0.0434353308858254, "learning_rate": 3.2951476670871235e-06, "loss": 0.8382, "step": 270410 }, { "epoch": 4.747625484997981, "grad_norm": 0.04866245250521003, "learning_rate": 3.2933516555414478e-06, "loss": 0.84, "step": 270420 }, { "epoch": 4.747801049877983, "grad_norm": 0.0571301710683115, "learning_rate": 3.291556881957747e-06, "loss": 0.8392, "step": 270430 }, { "epoch": 4.747976614757984, "grad_norm": 0.06537099344083497, "learning_rate": 3.289763346358739e-06, "loss": 0.8463, "step": 270440 }, { "epoch": 4.7481521796379855, "grad_norm": 0.05600803178631314, "learning_rate": 3.2879710487671557e-06, "loss": 0.8434, "step": 270450 }, { "epoch": 4.748327744517987, "grad_norm": 0.041087127703461165, "learning_rate": 3.286179989205692e-06, "loss": 0.8392, "step": 270460 }, { "epoch": 4.748503309397988, "grad_norm": 0.050434258680089224, "learning_rate": 3.2843901676970686e-06, "loss": 0.8373, "step": 270470 }, { "epoch": 4.7486788742779895, "grad_norm": 0.03716427768718725, "learning_rate": 3.2826015842639473e-06, "loss": 0.8468, "step": 270480 }, { "epoch": 4.7488544391579905, "grad_norm": 0.07201850458231546, "learning_rate": 3.2808142389289933e-06, "loss": 0.8387, "step": 270490 }, { "epoch": 4.7490300040379925, "grad_norm": 0.04294022736899864, "learning_rate": 3.279028131714848e-06, "loss": 0.8344, "step": 270500 }, { "epoch": 4.7492055689179935, "grad_norm": 0.0499524347791299, "learning_rate": 3.2772432626441535e-06, "loss": 0.834, "step": 270510 }, { "epoch": 4.749381133797995, "grad_norm": 0.05138282244571196, "learning_rate": 3.275459631739497e-06, "loss": 0.8383, "step": 270520 }, { "epoch": 4.7495566986779965, "grad_norm": 0.062100899140127654, "learning_rate": 3.273677239023509e-06, "loss": 0.8285, "step": 270530 }, { "epoch": 4.7497322635579975, "grad_norm": 0.058864676898113456, "learning_rate": 3.2718960845187553e-06, "loss": 0.8366, "step": 270540 }, { "epoch": 4.749907828437999, "grad_norm": 0.04894084416522389, "learning_rate": 3.2701161682478097e-06, "loss": 0.8429, "step": 270550 }, { "epoch": 4.7500833933180004, "grad_norm": 0.049139879353998425, "learning_rate": 3.268337490233217e-06, "loss": 0.8337, "step": 270560 }, { "epoch": 4.750258958198002, "grad_norm": 0.05708602351318422, "learning_rate": 3.2665600504975204e-06, "loss": 0.8325, "step": 270570 }, { "epoch": 4.750434523078003, "grad_norm": 0.04543547329410919, "learning_rate": 3.2647838490632397e-06, "loss": 0.8431, "step": 270580 }, { "epoch": 4.750610087958005, "grad_norm": 0.050039816193705074, "learning_rate": 3.2630088859528742e-06, "loss": 0.8359, "step": 270590 }, { "epoch": 4.750785652838006, "grad_norm": 0.053921826445360414, "learning_rate": 3.2612351611889233e-06, "loss": 0.8302, "step": 270600 }, { "epoch": 4.750961217718007, "grad_norm": 0.05228139559028975, "learning_rate": 3.2594626747938636e-06, "loss": 0.8448, "step": 270610 }, { "epoch": 4.751136782598009, "grad_norm": 0.048721128323644794, "learning_rate": 3.2576914267901286e-06, "loss": 0.8364, "step": 270620 }, { "epoch": 4.75131234747801, "grad_norm": 0.060704679337216925, "learning_rate": 3.255921417200185e-06, "loss": 0.8399, "step": 270630 }, { "epoch": 4.751487912358012, "grad_norm": 0.0446050423632679, "learning_rate": 3.254152646046442e-06, "loss": 0.8276, "step": 270640 }, { "epoch": 4.751663477238013, "grad_norm": 0.04647659806227319, "learning_rate": 3.2523851133513222e-06, "loss": 0.8432, "step": 270650 }, { "epoch": 4.751839042118014, "grad_norm": 0.053158569749119075, "learning_rate": 3.250618819137216e-06, "loss": 0.8543, "step": 270660 }, { "epoch": 4.752014606998016, "grad_norm": 0.04771647345033033, "learning_rate": 3.2488537634265e-06, "loss": 0.8337, "step": 270670 }, { "epoch": 4.752190171878017, "grad_norm": 0.0497789915833952, "learning_rate": 3.2470899462415424e-06, "loss": 0.8342, "step": 270680 }, { "epoch": 4.752365736758019, "grad_norm": 0.06969055934262718, "learning_rate": 3.245327367604688e-06, "loss": 0.831, "step": 270690 }, { "epoch": 4.75254130163802, "grad_norm": 0.04546738628059773, "learning_rate": 3.243566027538271e-06, "loss": 0.8374, "step": 270700 }, { "epoch": 4.752716866518021, "grad_norm": 0.06954260522572539, "learning_rate": 3.2418059260646025e-06, "loss": 0.8289, "step": 270710 }, { "epoch": 4.752892431398023, "grad_norm": 0.04883983841209657, "learning_rate": 3.2400470632059963e-06, "loss": 0.8374, "step": 270720 }, { "epoch": 4.753067996278024, "grad_norm": 0.05021226494281023, "learning_rate": 3.2382894389847304e-06, "loss": 0.8341, "step": 270730 }, { "epoch": 4.753243561158026, "grad_norm": 0.045622006661777126, "learning_rate": 3.2365330534230622e-06, "loss": 0.8354, "step": 270740 }, { "epoch": 4.753419126038027, "grad_norm": 0.05056057530822757, "learning_rate": 3.2347779065432717e-06, "loss": 0.8356, "step": 270750 }, { "epoch": 4.753594690918029, "grad_norm": 0.04871653113835665, "learning_rate": 3.233023998367571e-06, "loss": 0.8363, "step": 270760 }, { "epoch": 4.75377025579803, "grad_norm": 0.05897388101177949, "learning_rate": 3.2312713289181965e-06, "loss": 0.8485, "step": 270770 }, { "epoch": 4.753945820678032, "grad_norm": 0.050169921289439555, "learning_rate": 3.22951989821736e-06, "loss": 0.8419, "step": 270780 }, { "epoch": 4.754121385558033, "grad_norm": 0.055169919075575495, "learning_rate": 3.2277697062872313e-06, "loss": 0.8389, "step": 270790 }, { "epoch": 4.754296950438034, "grad_norm": 0.053570005521496974, "learning_rate": 3.2260207531500027e-06, "loss": 0.8458, "step": 270800 }, { "epoch": 4.754472515318036, "grad_norm": 0.04711782157778146, "learning_rate": 3.22427303882782e-06, "loss": 0.8433, "step": 270810 }, { "epoch": 4.754648080198037, "grad_norm": 0.049641366656153915, "learning_rate": 3.2225265633428535e-06, "loss": 0.8344, "step": 270820 }, { "epoch": 4.754823645078039, "grad_norm": 0.041519545558750835, "learning_rate": 3.220781326717206e-06, "loss": 0.8459, "step": 270830 }, { "epoch": 4.75499920995804, "grad_norm": 0.04739768527858636, "learning_rate": 3.2190373289730027e-06, "loss": 0.8444, "step": 270840 }, { "epoch": 4.755174774838041, "grad_norm": 0.05375754441511358, "learning_rate": 3.2172945701323363e-06, "loss": 0.8374, "step": 270850 }, { "epoch": 4.755350339718043, "grad_norm": 0.05376255607129604, "learning_rate": 3.215553050217277e-06, "loss": 0.8429, "step": 270860 }, { "epoch": 4.755525904598044, "grad_norm": 0.051695366041319524, "learning_rate": 3.213812769249906e-06, "loss": 0.8287, "step": 270870 }, { "epoch": 4.755701469478046, "grad_norm": 0.051838615157157146, "learning_rate": 3.2120737272522725e-06, "loss": 0.8298, "step": 270880 }, { "epoch": 4.755877034358047, "grad_norm": 0.04604301694625906, "learning_rate": 3.210335924246402e-06, "loss": 0.8423, "step": 270890 }, { "epoch": 4.756052599238048, "grad_norm": 0.04896197045001434, "learning_rate": 3.2085993602543202e-06, "loss": 0.8425, "step": 270900 }, { "epoch": 4.75622816411805, "grad_norm": 0.060778376759268206, "learning_rate": 3.2068640352980224e-06, "loss": 0.8366, "step": 270910 }, { "epoch": 4.756403728998051, "grad_norm": 0.06530358639316576, "learning_rate": 3.2051299493995003e-06, "loss": 0.8422, "step": 270920 }, { "epoch": 4.756579293878053, "grad_norm": 0.046333850275674986, "learning_rate": 3.2033971025807156e-06, "loss": 0.8331, "step": 270930 }, { "epoch": 4.756754858758054, "grad_norm": 0.05125117632559562, "learning_rate": 3.20166549486365e-06, "loss": 0.8428, "step": 270940 }, { "epoch": 4.756930423638055, "grad_norm": 0.0479131506170267, "learning_rate": 3.199935126270219e-06, "loss": 0.8362, "step": 270950 }, { "epoch": 4.757105988518057, "grad_norm": 0.06272170175082509, "learning_rate": 3.1982059968223524e-06, "loss": 0.8422, "step": 270960 }, { "epoch": 4.757281553398058, "grad_norm": 0.04934470200118367, "learning_rate": 3.1964781065419543e-06, "loss": 0.8396, "step": 270970 }, { "epoch": 4.75745711827806, "grad_norm": 0.04699288048044237, "learning_rate": 3.1947514554509185e-06, "loss": 0.8407, "step": 270980 }, { "epoch": 4.757632683158061, "grad_norm": 0.04750256753439146, "learning_rate": 3.1930260435711415e-06, "loss": 0.8405, "step": 270990 }, { "epoch": 4.757808248038063, "grad_norm": 0.0495094851224902, "learning_rate": 3.191301870924461e-06, "loss": 0.8382, "step": 271000 }, { "epoch": 4.757983812918064, "grad_norm": 0.05066369249900216, "learning_rate": 3.189578937532728e-06, "loss": 0.8442, "step": 271010 }, { "epoch": 4.758159377798066, "grad_norm": 0.042314152541360735, "learning_rate": 3.187857243417783e-06, "loss": 0.8382, "step": 271020 }, { "epoch": 4.758334942678067, "grad_norm": 0.049957018262588866, "learning_rate": 3.1861367886014196e-06, "loss": 0.8397, "step": 271030 }, { "epoch": 4.758510507558068, "grad_norm": 0.061857376265605474, "learning_rate": 3.184417573105457e-06, "loss": 0.8413, "step": 271040 }, { "epoch": 4.75868607243807, "grad_norm": 0.04350201672718212, "learning_rate": 3.182699596951669e-06, "loss": 0.8429, "step": 271050 }, { "epoch": 4.758861637318071, "grad_norm": 0.04828922331798451, "learning_rate": 3.1809828601618267e-06, "loss": 0.8274, "step": 271060 }, { "epoch": 4.759037202198073, "grad_norm": 0.04849686441288214, "learning_rate": 3.179267362757674e-06, "loss": 0.8424, "step": 271070 }, { "epoch": 4.759212767078074, "grad_norm": 0.06507825180000111, "learning_rate": 3.17755310476095e-06, "loss": 0.8375, "step": 271080 }, { "epoch": 4.759388331958075, "grad_norm": 0.05611490141302464, "learning_rate": 3.1758400861933836e-06, "loss": 0.8368, "step": 271090 }, { "epoch": 4.759563896838077, "grad_norm": 0.09122036002679286, "learning_rate": 3.1741283070766613e-06, "loss": 0.8342, "step": 271100 }, { "epoch": 4.759739461718078, "grad_norm": 0.08158851241790381, "learning_rate": 3.172417767432491e-06, "loss": 0.8379, "step": 271110 }, { "epoch": 4.75991502659808, "grad_norm": 0.05057759435715119, "learning_rate": 3.1707084672825358e-06, "loss": 0.844, "step": 271120 }, { "epoch": 4.760090591478081, "grad_norm": 0.04216180640753766, "learning_rate": 3.169000406648448e-06, "loss": 0.8396, "step": 271130 }, { "epoch": 4.760266156358082, "grad_norm": 0.04799174505604849, "learning_rate": 3.1672935855518814e-06, "loss": 0.839, "step": 271140 }, { "epoch": 4.760441721238084, "grad_norm": 0.044978553785657016, "learning_rate": 3.1655880040144444e-06, "loss": 0.843, "step": 271150 }, { "epoch": 4.760617286118085, "grad_norm": 0.07036420030013818, "learning_rate": 3.1638836620577665e-06, "loss": 0.8301, "step": 271160 }, { "epoch": 4.760792850998087, "grad_norm": 0.051508520919681014, "learning_rate": 3.162180559703425e-06, "loss": 0.8394, "step": 271170 }, { "epoch": 4.760968415878088, "grad_norm": 0.04616043876704121, "learning_rate": 3.1604786969730057e-06, "loss": 0.8351, "step": 271180 }, { "epoch": 4.7611439807580895, "grad_norm": 0.051002071924451275, "learning_rate": 3.158778073888086e-06, "loss": 0.8369, "step": 271190 }, { "epoch": 4.761319545638091, "grad_norm": 0.06320399686469767, "learning_rate": 3.157078690470183e-06, "loss": 0.8337, "step": 271200 }, { "epoch": 4.761495110518092, "grad_norm": 0.04910370676636998, "learning_rate": 3.1553805467408437e-06, "loss": 0.8376, "step": 271210 }, { "epoch": 4.7616706753980935, "grad_norm": 0.04402193894850012, "learning_rate": 3.1536836427215973e-06, "loss": 0.8375, "step": 271220 }, { "epoch": 4.7618462402780946, "grad_norm": 0.04511918416141858, "learning_rate": 3.1519879784339323e-06, "loss": 0.839, "step": 271230 }, { "epoch": 4.7620218051580965, "grad_norm": 0.04707698243762427, "learning_rate": 3.1502935538993147e-06, "loss": 0.8371, "step": 271240 }, { "epoch": 4.7621973700380975, "grad_norm": 0.061667066849666226, "learning_rate": 3.148600369139243e-06, "loss": 0.8393, "step": 271250 }, { "epoch": 4.762372934918099, "grad_norm": 0.06631474812330244, "learning_rate": 3.146908424175161e-06, "loss": 0.8332, "step": 271260 }, { "epoch": 4.7625484997981005, "grad_norm": 0.04894601317988778, "learning_rate": 3.1452177190284904e-06, "loss": 0.8308, "step": 271270 }, { "epoch": 4.7627240646781015, "grad_norm": 0.05075335331824453, "learning_rate": 3.1435282537206763e-06, "loss": 0.8431, "step": 271280 }, { "epoch": 4.762899629558103, "grad_norm": 0.04925546984764888, "learning_rate": 3.1418400282731174e-06, "loss": 0.8387, "step": 271290 }, { "epoch": 4.7630751944381045, "grad_norm": 0.06719340091583753, "learning_rate": 3.1401530427071918e-06, "loss": 0.8433, "step": 271300 }, { "epoch": 4.763250759318106, "grad_norm": 0.053641034017115494, "learning_rate": 3.1384672970442773e-06, "loss": 0.8364, "step": 271310 }, { "epoch": 4.763426324198107, "grad_norm": 0.05575639559834005, "learning_rate": 3.136782791305741e-06, "loss": 0.8356, "step": 271320 }, { "epoch": 4.763601889078108, "grad_norm": 0.056755631433561035, "learning_rate": 3.1350995255129273e-06, "loss": 0.8378, "step": 271330 }, { "epoch": 4.76377745395811, "grad_norm": 0.04556838681533919, "learning_rate": 3.1334174996871495e-06, "loss": 0.8414, "step": 271340 }, { "epoch": 4.763953018838111, "grad_norm": 0.07207157919685801, "learning_rate": 3.131736713849729e-06, "loss": 0.838, "step": 271350 }, { "epoch": 4.764128583718113, "grad_norm": 0.04286800940632896, "learning_rate": 3.130057168021979e-06, "loss": 0.8375, "step": 271360 }, { "epoch": 4.764304148598114, "grad_norm": 0.05331369675319611, "learning_rate": 3.1283788622251445e-06, "loss": 0.8423, "step": 271370 }, { "epoch": 4.764479713478115, "grad_norm": 0.05582712171674589, "learning_rate": 3.126701796480505e-06, "loss": 0.8375, "step": 271380 }, { "epoch": 4.764655278358117, "grad_norm": 0.04673141408576182, "learning_rate": 3.1250259708093066e-06, "loss": 0.8345, "step": 271390 }, { "epoch": 4.764830843238118, "grad_norm": 0.04614707878326132, "learning_rate": 3.1233513852328063e-06, "loss": 0.8419, "step": 271400 }, { "epoch": 4.76500640811812, "grad_norm": 0.05822677403639449, "learning_rate": 3.121678039772184e-06, "loss": 0.8355, "step": 271410 }, { "epoch": 4.765181972998121, "grad_norm": 0.04334761099375932, "learning_rate": 3.120005934448664e-06, "loss": 0.8409, "step": 271420 }, { "epoch": 4.765357537878123, "grad_norm": 0.0508257119019743, "learning_rate": 3.1183350692834358e-06, "loss": 0.8355, "step": 271430 }, { "epoch": 4.765533102758124, "grad_norm": 0.05079439966410048, "learning_rate": 3.116665444297649e-06, "loss": 0.8433, "step": 271440 }, { "epoch": 4.765708667638125, "grad_norm": 0.046292704470039955, "learning_rate": 3.114997059512482e-06, "loss": 0.8388, "step": 271450 }, { "epoch": 4.765884232518127, "grad_norm": 0.06148577121011536, "learning_rate": 3.11332991494906e-06, "loss": 0.8324, "step": 271460 }, { "epoch": 4.766059797398128, "grad_norm": 0.06236468299087744, "learning_rate": 3.1116640106284975e-06, "loss": 0.8433, "step": 271470 }, { "epoch": 4.76623536227813, "grad_norm": 0.048007557788704386, "learning_rate": 3.10999934657192e-06, "loss": 0.8299, "step": 271480 }, { "epoch": 4.766410927158131, "grad_norm": 0.05553435295644221, "learning_rate": 3.1083359228004076e-06, "loss": 0.8353, "step": 271490 }, { "epoch": 4.766586492038133, "grad_norm": 0.047181886976405615, "learning_rate": 3.106673739335053e-06, "loss": 0.8483, "step": 271500 }, { "epoch": 4.766762056918134, "grad_norm": 0.048184010531405606, "learning_rate": 3.105012796196883e-06, "loss": 0.839, "step": 271510 }, { "epoch": 4.766937621798135, "grad_norm": 0.06323833432354246, "learning_rate": 3.1033530934069775e-06, "loss": 0.8321, "step": 271520 }, { "epoch": 4.767113186678137, "grad_norm": 0.12148247422670377, "learning_rate": 3.1016946309863416e-06, "loss": 0.8343, "step": 271530 }, { "epoch": 4.767288751558138, "grad_norm": 0.054820959925492914, "learning_rate": 3.100037408956001e-06, "loss": 0.8372, "step": 271540 }, { "epoch": 4.76746431643814, "grad_norm": 0.07325980196775131, "learning_rate": 3.098381427336949e-06, "loss": 0.8422, "step": 271550 }, { "epoch": 4.767639881318141, "grad_norm": 0.0677934668803157, "learning_rate": 3.096726686150157e-06, "loss": 0.8359, "step": 271560 }, { "epoch": 4.767815446198142, "grad_norm": 0.058458925191270286, "learning_rate": 3.095073185416618e-06, "loss": 0.8332, "step": 271570 }, { "epoch": 4.767991011078144, "grad_norm": 0.052360144381423813, "learning_rate": 3.0934209251572596e-06, "loss": 0.8292, "step": 271580 }, { "epoch": 4.768166575958145, "grad_norm": 0.05595834606681241, "learning_rate": 3.0917699053930197e-06, "loss": 0.8312, "step": 271590 }, { "epoch": 4.768342140838147, "grad_norm": 0.0517464564305327, "learning_rate": 3.0901201261448145e-06, "loss": 0.8254, "step": 271600 }, { "epoch": 4.768517705718148, "grad_norm": 0.048215310524418506, "learning_rate": 3.08847158743355e-06, "loss": 0.8309, "step": 271610 }, { "epoch": 4.768693270598149, "grad_norm": 0.04695962487215543, "learning_rate": 3.0868242892801305e-06, "loss": 0.838, "step": 271620 }, { "epoch": 4.768868835478151, "grad_norm": 0.04685090833291896, "learning_rate": 3.085178231705407e-06, "loss": 0.8405, "step": 271630 }, { "epoch": 4.769044400358152, "grad_norm": 0.046947519785874275, "learning_rate": 3.0835334147302297e-06, "loss": 0.8405, "step": 271640 }, { "epoch": 4.769219965238154, "grad_norm": 0.04993960599291738, "learning_rate": 3.0818898383754595e-06, "loss": 0.8387, "step": 271650 }, { "epoch": 4.769395530118155, "grad_norm": 0.05442684057221733, "learning_rate": 3.080247502661903e-06, "loss": 0.8379, "step": 271660 }, { "epoch": 4.769571094998157, "grad_norm": 0.05475273258714771, "learning_rate": 3.078606407610389e-06, "loss": 0.8352, "step": 271670 }, { "epoch": 4.769746659878158, "grad_norm": 0.05188274088985188, "learning_rate": 3.0769665532416903e-06, "loss": 0.8355, "step": 271680 }, { "epoch": 4.76992222475816, "grad_norm": 0.04801000464249479, "learning_rate": 3.0753279395765807e-06, "loss": 0.8414, "step": 271690 }, { "epoch": 4.770097789638161, "grad_norm": 0.05114090982654578, "learning_rate": 3.0736905666358547e-06, "loss": 0.8374, "step": 271700 }, { "epoch": 4.770273354518162, "grad_norm": 0.052966068858296154, "learning_rate": 3.0720544344402207e-06, "loss": 0.8423, "step": 271710 }, { "epoch": 4.770448919398164, "grad_norm": 0.052374509632988106, "learning_rate": 3.0704195430104296e-06, "loss": 0.8351, "step": 271720 }, { "epoch": 4.770624484278165, "grad_norm": 0.049224668807189585, "learning_rate": 3.0687858923672004e-06, "loss": 0.8471, "step": 271730 }, { "epoch": 4.770800049158167, "grad_norm": 0.04893832427260954, "learning_rate": 3.067153482531218e-06, "loss": 0.8364, "step": 271740 }, { "epoch": 4.770975614038168, "grad_norm": 0.05726319810669407, "learning_rate": 3.065522313523157e-06, "loss": 0.8362, "step": 271750 }, { "epoch": 4.771151178918169, "grad_norm": 0.04456854875832515, "learning_rate": 3.0638923853637136e-06, "loss": 0.8378, "step": 271760 }, { "epoch": 4.771326743798171, "grad_norm": 0.09064007270316263, "learning_rate": 3.062263698073519e-06, "loss": 0.8369, "step": 271770 }, { "epoch": 4.771502308678172, "grad_norm": 0.05019757281402396, "learning_rate": 3.060636251673203e-06, "loss": 0.8368, "step": 271780 }, { "epoch": 4.771677873558174, "grad_norm": 0.04640220454695489, "learning_rate": 3.0590100461834073e-06, "loss": 0.8437, "step": 271790 }, { "epoch": 4.771853438438175, "grad_norm": 0.055834404937313266, "learning_rate": 3.0573850816247187e-06, "loss": 0.8429, "step": 271800 }, { "epoch": 4.772029003318176, "grad_norm": 0.04865562738890265, "learning_rate": 3.055761358017734e-06, "loss": 0.8365, "step": 271810 }, { "epoch": 4.772204568198178, "grad_norm": 0.050287794727624124, "learning_rate": 3.0541388753830188e-06, "loss": 0.8373, "step": 271820 }, { "epoch": 4.772380133078179, "grad_norm": 0.04745252734045799, "learning_rate": 3.052517633741148e-06, "loss": 0.8308, "step": 271830 }, { "epoch": 4.772555697958181, "grad_norm": 0.052719454337417225, "learning_rate": 3.050897633112642e-06, "loss": 0.8454, "step": 271840 }, { "epoch": 4.772731262838182, "grad_norm": 0.058145197703818235, "learning_rate": 3.0492788735180327e-06, "loss": 0.8357, "step": 271850 }, { "epoch": 4.772906827718184, "grad_norm": 0.0496581722149249, "learning_rate": 3.0476613549778295e-06, "loss": 0.8378, "step": 271860 }, { "epoch": 4.773082392598185, "grad_norm": 0.0475475905884917, "learning_rate": 3.0460450775125423e-06, "loss": 0.8409, "step": 271870 }, { "epoch": 4.773257957478186, "grad_norm": 0.0526072384018392, "learning_rate": 3.044430041142626e-06, "loss": 0.8365, "step": 271880 }, { "epoch": 4.773433522358188, "grad_norm": 0.041351976475230846, "learning_rate": 3.0428162458885565e-06, "loss": 0.8387, "step": 271890 }, { "epoch": 4.773609087238189, "grad_norm": 0.05796112984743137, "learning_rate": 3.0412036917707777e-06, "loss": 0.8377, "step": 271900 }, { "epoch": 4.773784652118191, "grad_norm": 0.05967469914285259, "learning_rate": 3.0395923788097217e-06, "loss": 0.8424, "step": 271910 }, { "epoch": 4.773960216998192, "grad_norm": 0.06128836117993918, "learning_rate": 3.037982307025812e-06, "loss": 0.8479, "step": 271920 }, { "epoch": 4.7741357818781935, "grad_norm": 0.041968077835822556, "learning_rate": 3.036373476439424e-06, "loss": 0.8382, "step": 271930 }, { "epoch": 4.774311346758195, "grad_norm": 0.06613798957388019, "learning_rate": 3.034765887070981e-06, "loss": 0.8295, "step": 271940 }, { "epoch": 4.774486911638196, "grad_norm": 0.051132717444606804, "learning_rate": 3.033159538940805e-06, "loss": 0.8437, "step": 271950 }, { "epoch": 4.7746624765181975, "grad_norm": 0.051986385549468765, "learning_rate": 3.031554432069296e-06, "loss": 0.8432, "step": 271960 }, { "epoch": 4.774838041398199, "grad_norm": 0.05131312205958178, "learning_rate": 3.029950566476754e-06, "loss": 0.8415, "step": 271970 }, { "epoch": 4.7750136062782005, "grad_norm": 0.04692997165847548, "learning_rate": 3.0283479421835253e-06, "loss": 0.8399, "step": 271980 }, { "epoch": 4.7751891711582015, "grad_norm": 0.04753910924717359, "learning_rate": 3.026746559209887e-06, "loss": 0.8352, "step": 271990 }, { "epoch": 4.7753647360382026, "grad_norm": 0.052590182153495056, "learning_rate": 3.0251464175761516e-06, "loss": 0.8387, "step": 272000 }, { "epoch": 4.7755403009182045, "grad_norm": 0.05079497141634782, "learning_rate": 3.023547517302598e-06, "loss": 0.8329, "step": 272010 }, { "epoch": 4.7757158657982055, "grad_norm": 0.04471549530958251, "learning_rate": 3.021949858409472e-06, "loss": 0.8377, "step": 272020 }, { "epoch": 4.775891430678207, "grad_norm": 0.07403224223379759, "learning_rate": 3.020353440917008e-06, "loss": 0.8392, "step": 272030 }, { "epoch": 4.7760669955582085, "grad_norm": 0.04523049307403165, "learning_rate": 3.0187582648454634e-06, "loss": 0.8457, "step": 272040 }, { "epoch": 4.7762425604382095, "grad_norm": 0.046249311421430225, "learning_rate": 3.0171643302150063e-06, "loss": 0.8421, "step": 272050 }, { "epoch": 4.776418125318211, "grad_norm": 0.05177348603866279, "learning_rate": 3.015571637045872e-06, "loss": 0.8315, "step": 272060 }, { "epoch": 4.7765936901982124, "grad_norm": 0.05429786167154706, "learning_rate": 3.0139801853582186e-06, "loss": 0.84, "step": 272070 }, { "epoch": 4.776769255078214, "grad_norm": 0.0472539858562014, "learning_rate": 3.012389975172215e-06, "loss": 0.8285, "step": 272080 }, { "epoch": 4.776944819958215, "grad_norm": 0.06726479266482321, "learning_rate": 3.0108010065080073e-06, "loss": 0.8401, "step": 272090 }, { "epoch": 4.777120384838217, "grad_norm": 0.045408363016968385, "learning_rate": 3.0092132793857315e-06, "loss": 0.8341, "step": 272100 }, { "epoch": 4.777295949718218, "grad_norm": 0.053299034049128405, "learning_rate": 3.0076267938255025e-06, "loss": 0.8341, "step": 272110 }, { "epoch": 4.777471514598219, "grad_norm": 0.057201910320460354, "learning_rate": 3.006041549847422e-06, "loss": 0.8308, "step": 272120 }, { "epoch": 4.777647079478221, "grad_norm": 0.06131261662966852, "learning_rate": 3.004457547471583e-06, "loss": 0.8405, "step": 272130 }, { "epoch": 4.777822644358222, "grad_norm": 0.06165089014902228, "learning_rate": 3.002874786718044e-06, "loss": 0.8403, "step": 272140 }, { "epoch": 4.777998209238224, "grad_norm": 0.053992010245312215, "learning_rate": 3.0012932676068527e-06, "loss": 0.8507, "step": 272150 }, { "epoch": 4.778173774118225, "grad_norm": 0.05397051905317473, "learning_rate": 2.9997129901580684e-06, "loss": 0.8432, "step": 272160 }, { "epoch": 4.778349338998227, "grad_norm": 0.049771610929222714, "learning_rate": 2.998133954391695e-06, "loss": 0.8388, "step": 272170 }, { "epoch": 4.778524903878228, "grad_norm": 0.04725557022636839, "learning_rate": 2.9965561603277586e-06, "loss": 0.8293, "step": 272180 }, { "epoch": 4.778700468758229, "grad_norm": 0.07401234985355087, "learning_rate": 2.99497960798623e-06, "loss": 0.8392, "step": 272190 }, { "epoch": 4.778876033638231, "grad_norm": 0.05036470722457178, "learning_rate": 2.9934042973870803e-06, "loss": 0.8417, "step": 272200 }, { "epoch": 4.779051598518232, "grad_norm": 0.051520491426241904, "learning_rate": 2.9918302285503028e-06, "loss": 0.8364, "step": 272210 }, { "epoch": 4.779227163398234, "grad_norm": 0.0763385776409845, "learning_rate": 2.9902574014958018e-06, "loss": 0.8468, "step": 272220 }, { "epoch": 4.779402728278235, "grad_norm": 0.05415339441461384, "learning_rate": 2.988685816243527e-06, "loss": 0.836, "step": 272230 }, { "epoch": 4.779578293158236, "grad_norm": 0.04355447847057992, "learning_rate": 2.987115472813394e-06, "loss": 0.8433, "step": 272240 }, { "epoch": 4.779753858038238, "grad_norm": 0.07974319836812874, "learning_rate": 2.985546371225287e-06, "loss": 0.8392, "step": 272250 }, { "epoch": 4.779929422918239, "grad_norm": 0.05745297876893198, "learning_rate": 2.9839785114990987e-06, "loss": 0.8396, "step": 272260 }, { "epoch": 4.780104987798241, "grad_norm": 0.06123438202070261, "learning_rate": 2.98241189365468e-06, "loss": 0.84, "step": 272270 }, { "epoch": 4.780280552678242, "grad_norm": 0.05252246026806511, "learning_rate": 2.9808465177118918e-06, "loss": 0.8373, "step": 272280 }, { "epoch": 4.780456117558243, "grad_norm": 0.0584847499596752, "learning_rate": 2.9792823836905514e-06, "loss": 0.8356, "step": 272290 }, { "epoch": 4.780631682438245, "grad_norm": 0.04092336029168831, "learning_rate": 2.9777194916104977e-06, "loss": 0.8463, "step": 272300 }, { "epoch": 4.780807247318246, "grad_norm": 0.06668370317051449, "learning_rate": 2.9761578414915373e-06, "loss": 0.8441, "step": 272310 }, { "epoch": 4.780982812198248, "grad_norm": 0.05887218172963556, "learning_rate": 2.9745974333534204e-06, "loss": 0.844, "step": 272320 }, { "epoch": 4.781158377078249, "grad_norm": 0.04963314592918964, "learning_rate": 2.9730382672159546e-06, "loss": 0.8231, "step": 272330 }, { "epoch": 4.781333941958251, "grad_norm": 0.05513565984105903, "learning_rate": 2.9714803430988676e-06, "loss": 0.8371, "step": 272340 }, { "epoch": 4.781509506838252, "grad_norm": 0.052073189648283925, "learning_rate": 2.969923661021934e-06, "loss": 0.8345, "step": 272350 }, { "epoch": 4.781685071718254, "grad_norm": 0.04956061466172039, "learning_rate": 2.968368221004838e-06, "loss": 0.8342, "step": 272360 }, { "epoch": 4.781860636598255, "grad_norm": 0.07003331534839052, "learning_rate": 2.966814023067309e-06, "loss": 0.8317, "step": 272370 }, { "epoch": 4.782036201478256, "grad_norm": 0.0566851936567614, "learning_rate": 2.9652610672290323e-06, "loss": 0.8428, "step": 272380 }, { "epoch": 4.782211766358258, "grad_norm": 0.04462201459975026, "learning_rate": 2.963709353509683e-06, "loss": 0.8326, "step": 272390 }, { "epoch": 4.782387331238259, "grad_norm": 0.053242754235159014, "learning_rate": 2.9621588819289343e-06, "loss": 0.8333, "step": 272400 }, { "epoch": 4.782562896118261, "grad_norm": 0.05542314436797711, "learning_rate": 2.9606096525064062e-06, "loss": 0.8267, "step": 272410 }, { "epoch": 4.782738460998262, "grad_norm": 0.05365925173725804, "learning_rate": 2.9590616652617615e-06, "loss": 0.8395, "step": 272420 }, { "epoch": 4.782914025878263, "grad_norm": 0.045062111203402845, "learning_rate": 2.9575149202145865e-06, "loss": 0.8357, "step": 272430 }, { "epoch": 4.783089590758265, "grad_norm": 0.04901015648438551, "learning_rate": 2.955969417384479e-06, "loss": 0.8411, "step": 272440 }, { "epoch": 4.783265155638266, "grad_norm": 0.05290357746798014, "learning_rate": 2.9544251567910464e-06, "loss": 0.8424, "step": 272450 }, { "epoch": 4.783440720518268, "grad_norm": 0.05779306273445232, "learning_rate": 2.952882138453821e-06, "loss": 0.8321, "step": 272460 }, { "epoch": 4.783616285398269, "grad_norm": 0.04210137667575115, "learning_rate": 2.9513403623923784e-06, "loss": 0.8466, "step": 272470 }, { "epoch": 4.78379185027827, "grad_norm": 0.04554060099178381, "learning_rate": 2.949799828626249e-06, "loss": 0.8415, "step": 272480 }, { "epoch": 4.783967415158272, "grad_norm": 0.06775259358198148, "learning_rate": 2.9482605371749425e-06, "loss": 0.8409, "step": 272490 }, { "epoch": 4.784142980038273, "grad_norm": 0.04898149312976878, "learning_rate": 2.9467224880579577e-06, "loss": 0.8415, "step": 272500 }, { "epoch": 4.784318544918275, "grad_norm": 0.07156119321163482, "learning_rate": 2.9451856812947923e-06, "loss": 0.8327, "step": 272510 }, { "epoch": 4.784494109798276, "grad_norm": 0.043847431509277106, "learning_rate": 2.9436501169049346e-06, "loss": 0.8326, "step": 272520 }, { "epoch": 4.784669674678277, "grad_norm": 0.05718004176203886, "learning_rate": 2.9421157949078055e-06, "loss": 0.8328, "step": 272530 }, { "epoch": 4.784845239558279, "grad_norm": 0.049987945184977775, "learning_rate": 2.9405827153228592e-06, "loss": 0.8364, "step": 272540 }, { "epoch": 4.78502080443828, "grad_norm": 0.04540508540671505, "learning_rate": 2.93905087816954e-06, "loss": 0.8363, "step": 272550 }, { "epoch": 4.785196369318282, "grad_norm": 0.050466865093385334, "learning_rate": 2.9375202834672245e-06, "loss": 0.8341, "step": 272560 }, { "epoch": 4.785371934198283, "grad_norm": 0.051836461132813504, "learning_rate": 2.9359909312353233e-06, "loss": 0.8404, "step": 272570 }, { "epoch": 4.785547499078285, "grad_norm": 0.05065113237817642, "learning_rate": 2.9344628214932147e-06, "loss": 0.8342, "step": 272580 }, { "epoch": 4.785723063958286, "grad_norm": 0.05849737797384587, "learning_rate": 2.9329359542602535e-06, "loss": 0.8395, "step": 272590 }, { "epoch": 4.785898628838288, "grad_norm": 0.061141963950995665, "learning_rate": 2.9314103295557846e-06, "loss": 0.8441, "step": 272600 }, { "epoch": 4.786074193718289, "grad_norm": 0.06200351088845982, "learning_rate": 2.9298859473991516e-06, "loss": 0.8324, "step": 272610 }, { "epoch": 4.78624975859829, "grad_norm": 0.1171198222851616, "learning_rate": 2.9283628078096564e-06, "loss": 0.8392, "step": 272620 }, { "epoch": 4.786425323478292, "grad_norm": 0.06627124053538233, "learning_rate": 2.926840910806588e-06, "loss": 0.8248, "step": 272630 }, { "epoch": 4.786600888358293, "grad_norm": 0.04256122894686353, "learning_rate": 2.9253202564092575e-06, "loss": 0.8391, "step": 272640 }, { "epoch": 4.786776453238295, "grad_norm": 0.05852519608021237, "learning_rate": 2.923800844636911e-06, "loss": 0.8399, "step": 272650 }, { "epoch": 4.786952018118296, "grad_norm": 0.05322086756714874, "learning_rate": 2.9222826755087944e-06, "loss": 0.8361, "step": 272660 }, { "epoch": 4.787127582998297, "grad_norm": 0.06075103182594135, "learning_rate": 2.9207657490441527e-06, "loss": 0.8475, "step": 272670 }, { "epoch": 4.787303147878299, "grad_norm": 0.060439836141617534, "learning_rate": 2.9192500652622094e-06, "loss": 0.8451, "step": 272680 }, { "epoch": 4.7874787127583, "grad_norm": 0.049366551753258235, "learning_rate": 2.9177356241821782e-06, "loss": 0.8374, "step": 272690 }, { "epoch": 4.7876542776383015, "grad_norm": 0.06717103203827511, "learning_rate": 2.9162224258232153e-06, "loss": 0.8367, "step": 272700 }, { "epoch": 4.787829842518303, "grad_norm": 0.07974625305893539, "learning_rate": 2.9147104702045236e-06, "loss": 0.8357, "step": 272710 }, { "epoch": 4.788005407398304, "grad_norm": 0.05187647392746153, "learning_rate": 2.9131997573452487e-06, "loss": 0.8373, "step": 272720 }, { "epoch": 4.7881809722783055, "grad_norm": 0.05031327692220071, "learning_rate": 2.9116902872645276e-06, "loss": 0.8353, "step": 272730 }, { "epoch": 4.788356537158307, "grad_norm": 0.05486571348650635, "learning_rate": 2.9101820599814837e-06, "loss": 0.8359, "step": 272740 }, { "epoch": 4.7885321020383085, "grad_norm": 0.051190094493191375, "learning_rate": 2.9086750755152316e-06, "loss": 0.8379, "step": 272750 }, { "epoch": 4.7887076669183095, "grad_norm": 0.05264359497721495, "learning_rate": 2.9071693338848736e-06, "loss": 0.842, "step": 272760 }, { "epoch": 4.788883231798311, "grad_norm": 0.06463692518768416, "learning_rate": 2.90566483510948e-06, "loss": 0.8287, "step": 272770 }, { "epoch": 4.7890587966783125, "grad_norm": 0.05270197616874153, "learning_rate": 2.904161579208108e-06, "loss": 0.8406, "step": 272780 }, { "epoch": 4.7892343615583135, "grad_norm": 0.0517538054160639, "learning_rate": 2.9026595661998073e-06, "loss": 0.8347, "step": 272790 }, { "epoch": 4.789409926438315, "grad_norm": 0.05271477471074964, "learning_rate": 2.9011587961036136e-06, "loss": 0.8311, "step": 272800 }, { "epoch": 4.7895854913183165, "grad_norm": 0.044253022947663305, "learning_rate": 2.8996592689385307e-06, "loss": 0.8375, "step": 272810 }, { "epoch": 4.789761056198318, "grad_norm": 0.04801087163220455, "learning_rate": 2.898160984723585e-06, "loss": 0.8316, "step": 272820 }, { "epoch": 4.789936621078319, "grad_norm": 0.07191219902452366, "learning_rate": 2.8966639434777252e-06, "loss": 0.8372, "step": 272830 }, { "epoch": 4.790112185958321, "grad_norm": 0.049254055095491356, "learning_rate": 2.8951681452199323e-06, "loss": 0.8352, "step": 272840 }, { "epoch": 4.790287750838322, "grad_norm": 0.05624239069998303, "learning_rate": 2.893673589969167e-06, "loss": 0.8339, "step": 272850 }, { "epoch": 4.790463315718323, "grad_norm": 0.07001655948731995, "learning_rate": 2.892180277744367e-06, "loss": 0.8452, "step": 272860 }, { "epoch": 4.790638880598325, "grad_norm": 0.05335236854064375, "learning_rate": 2.890688208564426e-06, "loss": 0.8447, "step": 272870 }, { "epoch": 4.790814445478326, "grad_norm": 0.05924205618755279, "learning_rate": 2.8891973824482814e-06, "loss": 0.8403, "step": 272880 }, { "epoch": 4.790990010358328, "grad_norm": 0.049782566080600096, "learning_rate": 2.887707799414806e-06, "loss": 0.8343, "step": 272890 }, { "epoch": 4.791165575238329, "grad_norm": 0.05061983145407054, "learning_rate": 2.886219459482882e-06, "loss": 0.8424, "step": 272900 }, { "epoch": 4.79134114011833, "grad_norm": 0.05001516970047681, "learning_rate": 2.884732362671349e-06, "loss": 0.8413, "step": 272910 }, { "epoch": 4.791516704998332, "grad_norm": 0.05902628642560134, "learning_rate": 2.8832465089990672e-06, "loss": 0.8342, "step": 272920 }, { "epoch": 4.791692269878333, "grad_norm": 0.05972825235462269, "learning_rate": 2.8817618984848545e-06, "loss": 0.8318, "step": 272930 }, { "epoch": 4.791867834758335, "grad_norm": 0.05783646018683862, "learning_rate": 2.880278531147527e-06, "loss": 0.8418, "step": 272940 }, { "epoch": 4.792043399638336, "grad_norm": 0.07222545560472893, "learning_rate": 2.8787964070058683e-06, "loss": 0.8352, "step": 272950 }, { "epoch": 4.792218964518337, "grad_norm": 0.04938546120214106, "learning_rate": 2.8773155260786747e-06, "loss": 0.8352, "step": 272960 }, { "epoch": 4.792394529398339, "grad_norm": 0.047309763079297476, "learning_rate": 2.875835888384674e-06, "loss": 0.8287, "step": 272970 }, { "epoch": 4.79257009427834, "grad_norm": 0.044903013533756836, "learning_rate": 2.874357493942662e-06, "loss": 0.8305, "step": 272980 }, { "epoch": 4.792745659158342, "grad_norm": 0.05223074968104964, "learning_rate": 2.872880342771345e-06, "loss": 0.8391, "step": 272990 }, { "epoch": 4.792921224038343, "grad_norm": 0.07007765930840969, "learning_rate": 2.8714044348894307e-06, "loss": 0.8385, "step": 273000 }, { "epoch": 4.793096788918345, "grad_norm": 0.06715124321390767, "learning_rate": 2.869929770315637e-06, "loss": 0.8346, "step": 273010 }, { "epoch": 4.793272353798346, "grad_norm": 0.04679157263483986, "learning_rate": 2.8684563490686267e-06, "loss": 0.8346, "step": 273020 }, { "epoch": 4.793447918678348, "grad_norm": 0.054952421320724934, "learning_rate": 2.866984171167096e-06, "loss": 0.8418, "step": 273030 }, { "epoch": 4.793623483558349, "grad_norm": 0.04355778966459859, "learning_rate": 2.8655132366296746e-06, "loss": 0.8399, "step": 273040 }, { "epoch": 4.79379904843835, "grad_norm": 0.06669245547686664, "learning_rate": 2.8640435454750045e-06, "loss": 0.8348, "step": 273050 }, { "epoch": 4.793974613318352, "grad_norm": 0.060256361709516745, "learning_rate": 2.862575097721726e-06, "loss": 0.8416, "step": 273060 }, { "epoch": 4.794150178198353, "grad_norm": 0.05779187902935643, "learning_rate": 2.8611078933884143e-06, "loss": 0.8403, "step": 273070 }, { "epoch": 4.794325743078355, "grad_norm": 0.05551534473036465, "learning_rate": 2.859641932493678e-06, "loss": 0.8343, "step": 273080 }, { "epoch": 4.794501307958356, "grad_norm": 0.05677549013349524, "learning_rate": 2.858177215056081e-06, "loss": 0.8387, "step": 273090 }, { "epoch": 4.794676872838357, "grad_norm": 0.05283057358707089, "learning_rate": 2.8567137410941984e-06, "loss": 0.8264, "step": 273100 }, { "epoch": 4.794852437718359, "grad_norm": 0.052024897547416744, "learning_rate": 2.855251510626561e-06, "loss": 0.8432, "step": 273110 }, { "epoch": 4.79502800259836, "grad_norm": 0.04724292497892216, "learning_rate": 2.8537905236716896e-06, "loss": 0.8397, "step": 273120 }, { "epoch": 4.795203567478362, "grad_norm": 0.053889806385812776, "learning_rate": 2.852330780248104e-06, "loss": 0.8396, "step": 273130 }, { "epoch": 4.795379132358363, "grad_norm": 0.06108352254654817, "learning_rate": 2.850872280374292e-06, "loss": 0.8366, "step": 273140 }, { "epoch": 4.795554697238364, "grad_norm": 0.06627764960451696, "learning_rate": 2.8494150240687508e-06, "loss": 0.84, "step": 273150 }, { "epoch": 4.795730262118366, "grad_norm": 0.04443483520565985, "learning_rate": 2.8479590113499247e-06, "loss": 0.8394, "step": 273160 }, { "epoch": 4.795905826998367, "grad_norm": 0.05072424258055007, "learning_rate": 2.846504242236267e-06, "loss": 0.8423, "step": 273170 }, { "epoch": 4.796081391878369, "grad_norm": 0.05704110196943755, "learning_rate": 2.8450507167462106e-06, "loss": 0.8305, "step": 273180 }, { "epoch": 4.79625695675837, "grad_norm": 0.05943635413399807, "learning_rate": 2.8435984348981766e-06, "loss": 0.8461, "step": 273190 }, { "epoch": 4.796432521638371, "grad_norm": 0.0564606158356772, "learning_rate": 2.8421473967105643e-06, "loss": 0.8348, "step": 273200 }, { "epoch": 4.796608086518373, "grad_norm": 0.046856436700104344, "learning_rate": 2.8406976022017507e-06, "loss": 0.8423, "step": 273210 }, { "epoch": 4.796783651398374, "grad_norm": 0.05476818821094388, "learning_rate": 2.8392490513901137e-06, "loss": 0.8376, "step": 273220 }, { "epoch": 4.796959216278376, "grad_norm": 0.05715358407742426, "learning_rate": 2.837801744294008e-06, "loss": 0.8324, "step": 273230 }, { "epoch": 4.797134781158377, "grad_norm": 0.0467898203480553, "learning_rate": 2.8363556809317564e-06, "loss": 0.8395, "step": 273240 }, { "epoch": 4.797310346038379, "grad_norm": 0.07099244577662506, "learning_rate": 2.8349108613217028e-06, "loss": 0.8407, "step": 273250 }, { "epoch": 4.79748591091838, "grad_norm": 0.04450691197176799, "learning_rate": 2.8334672854821264e-06, "loss": 0.8412, "step": 273260 }, { "epoch": 4.797661475798382, "grad_norm": 0.04596383473520709, "learning_rate": 2.832024953431348e-06, "loss": 0.8383, "step": 273270 }, { "epoch": 4.797837040678383, "grad_norm": 0.053478884993333774, "learning_rate": 2.8305838651876257e-06, "loss": 0.8442, "step": 273280 }, { "epoch": 4.798012605558384, "grad_norm": 0.0502824777620097, "learning_rate": 2.8291440207692145e-06, "loss": 0.8382, "step": 273290 }, { "epoch": 4.798188170438386, "grad_norm": 0.05523903914424387, "learning_rate": 2.8277054201943613e-06, "loss": 0.8372, "step": 273300 }, { "epoch": 4.798363735318387, "grad_norm": 0.0459653547727196, "learning_rate": 2.8262680634812988e-06, "loss": 0.8345, "step": 273310 }, { "epoch": 4.798539300198389, "grad_norm": 0.045190771556223266, "learning_rate": 2.8248319506482408e-06, "loss": 0.8381, "step": 273320 }, { "epoch": 4.79871486507839, "grad_norm": 0.04103432343079966, "learning_rate": 2.8233970817133663e-06, "loss": 0.8405, "step": 273330 }, { "epoch": 4.798890429958391, "grad_norm": 0.048575224040649144, "learning_rate": 2.821963456694876e-06, "loss": 0.846, "step": 273340 }, { "epoch": 4.799065994838393, "grad_norm": 0.045335192671927135, "learning_rate": 2.820531075610918e-06, "loss": 0.8433, "step": 273350 }, { "epoch": 4.799241559718394, "grad_norm": 0.05522109388490602, "learning_rate": 2.8190999384796373e-06, "loss": 0.8383, "step": 273360 }, { "epoch": 4.799417124598396, "grad_norm": 0.046278290612909986, "learning_rate": 2.8176700453191923e-06, "loss": 0.8353, "step": 273370 }, { "epoch": 4.799592689478397, "grad_norm": 0.06304691455283677, "learning_rate": 2.816241396147685e-06, "loss": 0.8314, "step": 273380 }, { "epoch": 4.799768254358398, "grad_norm": 0.04340434022187186, "learning_rate": 2.814813990983207e-06, "loss": 0.8338, "step": 273390 }, { "epoch": 4.7999438192384, "grad_norm": 0.05750832249920061, "learning_rate": 2.813387829843861e-06, "loss": 0.8387, "step": 273400 }, { "epoch": 4.800119384118401, "grad_norm": 0.04734387336845292, "learning_rate": 2.8119629127477053e-06, "loss": 0.8386, "step": 273410 }, { "epoch": 4.800294948998403, "grad_norm": 0.053375768160957314, "learning_rate": 2.810539239712788e-06, "loss": 0.8441, "step": 273420 }, { "epoch": 4.800470513878404, "grad_norm": 0.0501417982139343, "learning_rate": 2.8091168107571667e-06, "loss": 0.8409, "step": 273430 }, { "epoch": 4.8006460787584055, "grad_norm": 0.06007955756468875, "learning_rate": 2.8076956258988573e-06, "loss": 0.8414, "step": 273440 }, { "epoch": 4.800821643638407, "grad_norm": 0.05219339548589624, "learning_rate": 2.8062756851558514e-06, "loss": 0.834, "step": 273450 }, { "epoch": 4.800997208518408, "grad_norm": 0.055381367315034166, "learning_rate": 2.8048569885461527e-06, "loss": 0.8424, "step": 273460 }, { "epoch": 4.8011727733984095, "grad_norm": 0.05499448717127039, "learning_rate": 2.803439536087743e-06, "loss": 0.8364, "step": 273470 }, { "epoch": 4.801348338278411, "grad_norm": 0.043156935884811884, "learning_rate": 2.8020233277985713e-06, "loss": 0.8391, "step": 273480 }, { "epoch": 4.8015239031584125, "grad_norm": 0.0627095948238732, "learning_rate": 2.800608363696585e-06, "loss": 0.8436, "step": 273490 }, { "epoch": 4.8016994680384135, "grad_norm": 0.05600234448731083, "learning_rate": 2.799194643799712e-06, "loss": 0.8397, "step": 273500 }, { "epoch": 4.801875032918415, "grad_norm": 0.05669472623657756, "learning_rate": 2.797782168125857e-06, "loss": 0.8417, "step": 273510 }, { "epoch": 4.8020505977984165, "grad_norm": 0.05494440984824445, "learning_rate": 2.7963709366929242e-06, "loss": 0.8419, "step": 273520 }, { "epoch": 4.8022261626784175, "grad_norm": 0.05350391029861425, "learning_rate": 2.7949609495187857e-06, "loss": 0.8299, "step": 273530 }, { "epoch": 4.802401727558419, "grad_norm": 0.05701916678257653, "learning_rate": 2.7935522066213235e-06, "loss": 0.8332, "step": 273540 }, { "epoch": 4.8025772924384205, "grad_norm": 0.05270732642708108, "learning_rate": 2.7921447080183657e-06, "loss": 0.8406, "step": 273550 }, { "epoch": 4.802752857318422, "grad_norm": 0.07089415587909113, "learning_rate": 2.790738453727762e-06, "loss": 0.8454, "step": 273560 }, { "epoch": 4.802928422198423, "grad_norm": 0.046545926065815826, "learning_rate": 2.7893334437673293e-06, "loss": 0.8377, "step": 273570 }, { "epoch": 4.8031039870784245, "grad_norm": 0.05158493306681433, "learning_rate": 2.7879296781548625e-06, "loss": 0.836, "step": 273580 }, { "epoch": 4.803279551958426, "grad_norm": 0.04936613695602706, "learning_rate": 2.7865271569081334e-06, "loss": 0.8404, "step": 273590 }, { "epoch": 4.803455116838427, "grad_norm": 0.055032483199221854, "learning_rate": 2.7851258800449386e-06, "loss": 0.8362, "step": 273600 }, { "epoch": 4.803630681718429, "grad_norm": 0.08719845235547871, "learning_rate": 2.7837258475830267e-06, "loss": 0.8475, "step": 273610 }, { "epoch": 4.80380624659843, "grad_norm": 0.051376394459901005, "learning_rate": 2.7823270595401284e-06, "loss": 0.8386, "step": 273620 }, { "epoch": 4.803981811478431, "grad_norm": 0.04802552553335705, "learning_rate": 2.780929515933972e-06, "loss": 0.8387, "step": 273630 }, { "epoch": 4.804157376358433, "grad_norm": 0.055168456047945916, "learning_rate": 2.7795332167822523e-06, "loss": 0.8409, "step": 273640 }, { "epoch": 4.804332941238434, "grad_norm": 0.07140633419542976, "learning_rate": 2.778138162102677e-06, "loss": 0.8383, "step": 273650 }, { "epoch": 4.804508506118436, "grad_norm": 0.05048735364421116, "learning_rate": 2.7767443519129208e-06, "loss": 0.8417, "step": 273660 }, { "epoch": 4.804684070998437, "grad_norm": 0.053057210301874005, "learning_rate": 2.775351786230634e-06, "loss": 0.8367, "step": 273670 }, { "epoch": 4.804859635878439, "grad_norm": 0.051384205576900176, "learning_rate": 2.773960465073458e-06, "loss": 0.8332, "step": 273680 }, { "epoch": 4.80503520075844, "grad_norm": 0.057730887155330333, "learning_rate": 2.772570388459034e-06, "loss": 0.8468, "step": 273690 }, { "epoch": 4.805210765638442, "grad_norm": 0.04979515040868421, "learning_rate": 2.771181556404959e-06, "loss": 0.8376, "step": 273700 }, { "epoch": 4.805386330518443, "grad_norm": 0.05505416143428089, "learning_rate": 2.7697939689288515e-06, "loss": 0.838, "step": 273710 }, { "epoch": 4.805561895398444, "grad_norm": 0.05296292114786627, "learning_rate": 2.7684076260482758e-06, "loss": 0.8312, "step": 273720 }, { "epoch": 4.805737460278446, "grad_norm": 0.07356385281199877, "learning_rate": 2.7670225277808064e-06, "loss": 0.8338, "step": 273730 }, { "epoch": 4.805913025158447, "grad_norm": 0.049450834222695764, "learning_rate": 2.7656386741439856e-06, "loss": 0.8347, "step": 273740 }, { "epoch": 4.806088590038449, "grad_norm": 0.04835167639125916, "learning_rate": 2.7642560651553443e-06, "loss": 0.8387, "step": 273750 }, { "epoch": 4.80626415491845, "grad_norm": 0.05388142273795797, "learning_rate": 2.762874700832403e-06, "loss": 0.8482, "step": 273760 }, { "epoch": 4.806439719798451, "grad_norm": 0.0550001498786457, "learning_rate": 2.7614945811926694e-06, "loss": 0.8276, "step": 273770 }, { "epoch": 4.806615284678453, "grad_norm": 0.04871263777974383, "learning_rate": 2.760115706253632e-06, "loss": 0.8323, "step": 273780 }, { "epoch": 4.806790849558454, "grad_norm": 0.05140904201542726, "learning_rate": 2.758738076032755e-06, "loss": 0.8326, "step": 273790 }, { "epoch": 4.806966414438456, "grad_norm": 0.054478179861003624, "learning_rate": 2.7573616905474922e-06, "loss": 0.8419, "step": 273800 }, { "epoch": 4.807141979318457, "grad_norm": 0.05531657125836398, "learning_rate": 2.755986549815287e-06, "loss": 0.8243, "step": 273810 }, { "epoch": 4.807317544198458, "grad_norm": 0.041214739299007645, "learning_rate": 2.7546126538535503e-06, "loss": 0.8388, "step": 273820 }, { "epoch": 4.80749310907846, "grad_norm": 0.0490472170678095, "learning_rate": 2.753240002679713e-06, "loss": 0.8375, "step": 273830 }, { "epoch": 4.807668673958461, "grad_norm": 0.043477221850859823, "learning_rate": 2.7518685963111523e-06, "loss": 0.8449, "step": 273840 }, { "epoch": 4.807844238838463, "grad_norm": 0.054395727477970925, "learning_rate": 2.7504984347652343e-06, "loss": 0.8414, "step": 273850 }, { "epoch": 4.808019803718464, "grad_norm": 0.05885581000972204, "learning_rate": 2.749129518059337e-06, "loss": 0.8373, "step": 273860 }, { "epoch": 4.808195368598465, "grad_norm": 0.045735304436029606, "learning_rate": 2.7477618462108037e-06, "loss": 0.8425, "step": 273870 }, { "epoch": 4.808370933478467, "grad_norm": 0.05941723123880172, "learning_rate": 2.7463954192369563e-06, "loss": 0.8427, "step": 273880 }, { "epoch": 4.808546498358468, "grad_norm": 0.05490845292081517, "learning_rate": 2.7450302371551073e-06, "loss": 0.8355, "step": 273890 }, { "epoch": 4.80872206323847, "grad_norm": 0.04579878491329432, "learning_rate": 2.743666299982555e-06, "loss": 0.8385, "step": 273900 }, { "epoch": 4.808897628118471, "grad_norm": 0.0472595791606158, "learning_rate": 2.7423036077365795e-06, "loss": 0.8437, "step": 273910 }, { "epoch": 4.809073192998473, "grad_norm": 0.054958833484623035, "learning_rate": 2.740942160434457e-06, "loss": 0.8388, "step": 273920 }, { "epoch": 4.809248757878474, "grad_norm": 0.04287784919351478, "learning_rate": 2.7395819580934125e-06, "loss": 0.8351, "step": 273930 }, { "epoch": 4.809424322758476, "grad_norm": 0.07154161510818999, "learning_rate": 2.738223000730712e-06, "loss": 0.8415, "step": 273940 }, { "epoch": 4.809599887638477, "grad_norm": 0.048537101118523496, "learning_rate": 2.736865288363546e-06, "loss": 0.8375, "step": 273950 }, { "epoch": 4.809775452518478, "grad_norm": 0.06304159633431661, "learning_rate": 2.7355088210091382e-06, "loss": 0.8379, "step": 273960 }, { "epoch": 4.80995101739848, "grad_norm": 0.04757719785257009, "learning_rate": 2.7341535986846563e-06, "loss": 0.841, "step": 273970 }, { "epoch": 4.810126582278481, "grad_norm": 0.05250908224084286, "learning_rate": 2.7327996214072913e-06, "loss": 0.8294, "step": 273980 }, { "epoch": 4.810302147158483, "grad_norm": 0.05744159728192114, "learning_rate": 2.7314468891941886e-06, "loss": 0.8353, "step": 273990 }, { "epoch": 4.810477712038484, "grad_norm": 0.049875583689256785, "learning_rate": 2.730095402062473e-06, "loss": 0.8314, "step": 274000 }, { "epoch": 4.810653276918485, "grad_norm": 0.04432889845353877, "learning_rate": 2.7287451600293016e-06, "loss": 0.8426, "step": 274010 }, { "epoch": 4.810828841798487, "grad_norm": 0.04807263168156763, "learning_rate": 2.7273961631117436e-06, "loss": 0.8302, "step": 274020 }, { "epoch": 4.811004406678488, "grad_norm": 0.052133741825497446, "learning_rate": 2.726048411326913e-06, "loss": 0.8373, "step": 274030 }, { "epoch": 4.81117997155849, "grad_norm": 0.047510591391694154, "learning_rate": 2.7247019046918896e-06, "loss": 0.8291, "step": 274040 }, { "epoch": 4.811355536438491, "grad_norm": 0.0476299502665492, "learning_rate": 2.723356643223732e-06, "loss": 0.8408, "step": 274050 }, { "epoch": 4.811531101318492, "grad_norm": 0.06816853263031739, "learning_rate": 2.7220126269394754e-06, "loss": 0.8386, "step": 274060 }, { "epoch": 4.811706666198494, "grad_norm": 0.062460173064042133, "learning_rate": 2.720669855856147e-06, "loss": 0.8335, "step": 274070 }, { "epoch": 4.811882231078495, "grad_norm": 0.05588080699337944, "learning_rate": 2.719328329990782e-06, "loss": 0.8357, "step": 274080 }, { "epoch": 4.812057795958497, "grad_norm": 0.05027591745926784, "learning_rate": 2.7179880493603513e-06, "loss": 0.8394, "step": 274090 }, { "epoch": 4.812233360838498, "grad_norm": 0.05294402029794779, "learning_rate": 2.7166490139818473e-06, "loss": 0.8466, "step": 274100 }, { "epoch": 4.8124089257185, "grad_norm": 0.05857836935313177, "learning_rate": 2.7153112238722294e-06, "loss": 0.8349, "step": 274110 }, { "epoch": 4.812584490598501, "grad_norm": 0.052159958928901254, "learning_rate": 2.7139746790484676e-06, "loss": 0.8361, "step": 274120 }, { "epoch": 4.812760055478502, "grad_norm": 0.04611464364343843, "learning_rate": 2.712639379527478e-06, "loss": 0.8368, "step": 274130 }, { "epoch": 4.812935620358504, "grad_norm": 0.0505817095671278, "learning_rate": 2.7113053253261864e-06, "loss": 0.8349, "step": 274140 }, { "epoch": 4.813111185238505, "grad_norm": 0.047330964811583956, "learning_rate": 2.709972516461498e-06, "loss": 0.8411, "step": 274150 }, { "epoch": 4.813286750118507, "grad_norm": 0.07991379307677449, "learning_rate": 2.708640952950284e-06, "loss": 0.8422, "step": 274160 }, { "epoch": 4.813462314998508, "grad_norm": 0.046234165487679425, "learning_rate": 2.707310634809426e-06, "loss": 0.8371, "step": 274170 }, { "epoch": 4.8136378798785096, "grad_norm": 0.054470840165176816, "learning_rate": 2.7059815620557863e-06, "loss": 0.8347, "step": 274180 }, { "epoch": 4.813813444758511, "grad_norm": 0.06331153572832125, "learning_rate": 2.704653734706202e-06, "loss": 0.8409, "step": 274190 }, { "epoch": 4.813989009638512, "grad_norm": 0.0783254638578779, "learning_rate": 2.7033271527774795e-06, "loss": 0.8381, "step": 274200 }, { "epoch": 4.8141645745185135, "grad_norm": 0.04624760256893523, "learning_rate": 2.7020018162864567e-06, "loss": 0.8382, "step": 274210 }, { "epoch": 4.814340139398515, "grad_norm": 0.06850580339394562, "learning_rate": 2.7006777252499063e-06, "loss": 0.8329, "step": 274220 }, { "epoch": 4.8145157042785165, "grad_norm": 0.04177568116558813, "learning_rate": 2.6993548796846015e-06, "loss": 0.8376, "step": 274230 }, { "epoch": 4.8146912691585175, "grad_norm": 0.06267301148721093, "learning_rate": 2.698033279607303e-06, "loss": 0.8322, "step": 274240 }, { "epoch": 4.814866834038519, "grad_norm": 0.0716090966351115, "learning_rate": 2.696712925034773e-06, "loss": 0.836, "step": 274250 }, { "epoch": 4.8150423989185205, "grad_norm": 0.05441569158967458, "learning_rate": 2.695393815983729e-06, "loss": 0.8376, "step": 274260 }, { "epoch": 4.8152179637985215, "grad_norm": 0.0499427873689832, "learning_rate": 2.6940759524708887e-06, "loss": 0.8423, "step": 274270 }, { "epoch": 4.815393528678523, "grad_norm": 0.04635721532955182, "learning_rate": 2.6927593345129364e-06, "loss": 0.839, "step": 274280 }, { "epoch": 4.8155690935585245, "grad_norm": 0.04751693951685802, "learning_rate": 2.6914439621265787e-06, "loss": 0.839, "step": 274290 }, { "epoch": 4.8157446584385255, "grad_norm": 0.052849409514471264, "learning_rate": 2.6901298353284576e-06, "loss": 0.8381, "step": 274300 }, { "epoch": 4.815920223318527, "grad_norm": 0.052895367974738504, "learning_rate": 2.6888169541352223e-06, "loss": 0.8466, "step": 274310 }, { "epoch": 4.8160957881985285, "grad_norm": 0.05776267355681738, "learning_rate": 2.687505318563538e-06, "loss": 0.8406, "step": 274320 }, { "epoch": 4.81627135307853, "grad_norm": 0.05314365820847243, "learning_rate": 2.6861949286299893e-06, "loss": 0.8385, "step": 274330 }, { "epoch": 4.816446917958531, "grad_norm": 0.043585922067338356, "learning_rate": 2.6848857843511943e-06, "loss": 0.8457, "step": 274340 }, { "epoch": 4.816622482838533, "grad_norm": 0.04187818110033791, "learning_rate": 2.68357788574374e-06, "loss": 0.8365, "step": 274350 }, { "epoch": 4.816798047718534, "grad_norm": 0.0495627669318699, "learning_rate": 2.6822712328241994e-06, "loss": 0.84, "step": 274360 }, { "epoch": 4.816973612598535, "grad_norm": 0.06441702459249636, "learning_rate": 2.680965825609117e-06, "loss": 0.8451, "step": 274370 }, { "epoch": 4.817149177478537, "grad_norm": 0.04628159894711274, "learning_rate": 2.679661664115043e-06, "loss": 0.8424, "step": 274380 }, { "epoch": 4.817324742358538, "grad_norm": 0.043159504370785, "learning_rate": 2.678358748358509e-06, "loss": 0.8415, "step": 274390 }, { "epoch": 4.81750030723854, "grad_norm": 0.05234719638722515, "learning_rate": 2.677057078356002e-06, "loss": 0.8377, "step": 274400 }, { "epoch": 4.817675872118541, "grad_norm": 0.05365199459993951, "learning_rate": 2.6757566541240204e-06, "loss": 0.8362, "step": 274410 }, { "epoch": 4.817851436998543, "grad_norm": 0.05292267691780359, "learning_rate": 2.6744574756790604e-06, "loss": 0.8363, "step": 274420 }, { "epoch": 4.818027001878544, "grad_norm": 0.045726360428158454, "learning_rate": 2.6731595430375553e-06, "loss": 0.8338, "step": 274430 }, { "epoch": 4.818202566758545, "grad_norm": 0.054804219515055604, "learning_rate": 2.67186285621597e-06, "loss": 0.8334, "step": 274440 }, { "epoch": 4.818378131638547, "grad_norm": 0.048784839968412604, "learning_rate": 2.6705674152307237e-06, "loss": 0.8306, "step": 274450 }, { "epoch": 4.818553696518548, "grad_norm": 0.06639449827794473, "learning_rate": 2.6692732200982387e-06, "loss": 0.8426, "step": 274460 }, { "epoch": 4.81872926139855, "grad_norm": 0.060787795893001394, "learning_rate": 2.667980270834903e-06, "loss": 0.8347, "step": 274470 }, { "epoch": 4.818904826278551, "grad_norm": 0.04789555597792899, "learning_rate": 2.666688567457104e-06, "loss": 0.8435, "step": 274480 }, { "epoch": 4.819080391158552, "grad_norm": 0.05370176370490783, "learning_rate": 2.665398109981207e-06, "loss": 0.8417, "step": 274490 }, { "epoch": 4.819255956038554, "grad_norm": 0.04170856075861181, "learning_rate": 2.664108898423557e-06, "loss": 0.8363, "step": 274500 }, { "epoch": 4.819431520918555, "grad_norm": 0.039758572141325076, "learning_rate": 2.6628209328004977e-06, "loss": 0.8384, "step": 274510 }, { "epoch": 4.819607085798557, "grad_norm": 0.05041261471786215, "learning_rate": 2.6615342131283392e-06, "loss": 0.8334, "step": 274520 }, { "epoch": 4.819782650678558, "grad_norm": 0.07568819229042462, "learning_rate": 2.6602487394233935e-06, "loss": 0.8351, "step": 274530 }, { "epoch": 4.819958215558559, "grad_norm": 0.049969717678388856, "learning_rate": 2.6589645117019383e-06, "loss": 0.837, "step": 274540 }, { "epoch": 4.820133780438561, "grad_norm": 0.05764947460909601, "learning_rate": 2.6576815299802515e-06, "loss": 0.8356, "step": 274550 }, { "epoch": 4.820309345318562, "grad_norm": 0.04926990174381782, "learning_rate": 2.656399794274589e-06, "loss": 0.8412, "step": 274560 }, { "epoch": 4.820484910198564, "grad_norm": 0.05927757439200278, "learning_rate": 2.6551193046011845e-06, "loss": 0.8309, "step": 274570 }, { "epoch": 4.820660475078565, "grad_norm": 0.05294733621179258, "learning_rate": 2.6538400609762728e-06, "loss": 0.8342, "step": 274580 }, { "epoch": 4.820836039958567, "grad_norm": 0.053094900878462956, "learning_rate": 2.6525620634160545e-06, "loss": 0.8387, "step": 274590 }, { "epoch": 4.821011604838568, "grad_norm": 0.05272757671609848, "learning_rate": 2.6512853119367084e-06, "loss": 0.8392, "step": 274600 }, { "epoch": 4.82118716971857, "grad_norm": 0.11747388538621623, "learning_rate": 2.650009806554436e-06, "loss": 0.8363, "step": 274610 }, { "epoch": 4.821362734598571, "grad_norm": 0.05708374536696195, "learning_rate": 2.6487355472853942e-06, "loss": 0.8385, "step": 274620 }, { "epoch": 4.821538299478572, "grad_norm": 0.05367379767065688, "learning_rate": 2.647462534145718e-06, "loss": 0.8421, "step": 274630 }, { "epoch": 4.821713864358574, "grad_norm": 0.0467326387823937, "learning_rate": 2.646190767151543e-06, "loss": 0.838, "step": 274640 }, { "epoch": 4.821889429238575, "grad_norm": 0.05397787101301422, "learning_rate": 2.6449202463189704e-06, "loss": 0.8313, "step": 274650 }, { "epoch": 4.822064994118577, "grad_norm": 0.04691355722044592, "learning_rate": 2.643650971664125e-06, "loss": 0.8366, "step": 274660 }, { "epoch": 4.822240558998578, "grad_norm": 0.06322988722110144, "learning_rate": 2.6423829432030537e-06, "loss": 0.8376, "step": 274670 }, { "epoch": 4.822416123878579, "grad_norm": 0.04833355451809514, "learning_rate": 2.641116160951848e-06, "loss": 0.8404, "step": 274680 }, { "epoch": 4.822591688758581, "grad_norm": 0.042032972414826654, "learning_rate": 2.6398506249265545e-06, "loss": 0.8411, "step": 274690 }, { "epoch": 4.822767253638582, "grad_norm": 0.043564952241920744, "learning_rate": 2.6385863351431982e-06, "loss": 0.8389, "step": 274700 }, { "epoch": 4.822942818518584, "grad_norm": 0.05367997056934279, "learning_rate": 2.6373232916178055e-06, "loss": 0.838, "step": 274710 }, { "epoch": 4.823118383398585, "grad_norm": 0.05210193165190063, "learning_rate": 2.636061494366379e-06, "loss": 0.8443, "step": 274720 }, { "epoch": 4.823293948278586, "grad_norm": 0.04762947654934266, "learning_rate": 2.63480094340491e-06, "loss": 0.84, "step": 274730 }, { "epoch": 4.823469513158588, "grad_norm": 0.04655483271292889, "learning_rate": 2.633541638749359e-06, "loss": 0.8397, "step": 274740 }, { "epoch": 4.823645078038589, "grad_norm": 0.07853533613181898, "learning_rate": 2.6322835804156843e-06, "loss": 0.8446, "step": 274750 }, { "epoch": 4.823820642918591, "grad_norm": 0.0635948624330069, "learning_rate": 2.631026768419835e-06, "loss": 0.8356, "step": 274760 }, { "epoch": 4.823996207798592, "grad_norm": 0.06114650491965545, "learning_rate": 2.6297712027777255e-06, "loss": 0.8323, "step": 274770 }, { "epoch": 4.824171772678594, "grad_norm": 0.04838962814988243, "learning_rate": 2.62851688350526e-06, "loss": 0.8378, "step": 274780 }, { "epoch": 4.824347337558595, "grad_norm": 0.06101627614310327, "learning_rate": 2.6272638106183553e-06, "loss": 0.8431, "step": 274790 }, { "epoch": 4.824522902438596, "grad_norm": 0.05008757658798925, "learning_rate": 2.6260119841328594e-06, "loss": 0.8308, "step": 274800 }, { "epoch": 4.824698467318598, "grad_norm": 0.0476590880729688, "learning_rate": 2.6247614040646435e-06, "loss": 0.8407, "step": 274810 }, { "epoch": 4.824874032198599, "grad_norm": 0.052235951633122606, "learning_rate": 2.6235120704295574e-06, "loss": 0.8395, "step": 274820 }, { "epoch": 4.825049597078601, "grad_norm": 0.04793029521055945, "learning_rate": 2.6222639832434396e-06, "loss": 0.8458, "step": 274830 }, { "epoch": 4.825225161958602, "grad_norm": 0.05576796028477506, "learning_rate": 2.621017142522073e-06, "loss": 0.8432, "step": 274840 }, { "epoch": 4.825400726838604, "grad_norm": 0.049864772133782584, "learning_rate": 2.6197715482812747e-06, "loss": 0.8336, "step": 274850 }, { "epoch": 4.825576291718605, "grad_norm": 0.05422453430335835, "learning_rate": 2.618527200536838e-06, "loss": 0.8409, "step": 274860 }, { "epoch": 4.825751856598606, "grad_norm": 0.09981768138556454, "learning_rate": 2.6172840993045138e-06, "loss": 0.8316, "step": 274870 }, { "epoch": 4.825927421478608, "grad_norm": 0.0575394607189031, "learning_rate": 2.616042244600042e-06, "loss": 0.8374, "step": 274880 }, { "epoch": 4.826102986358609, "grad_norm": 0.05270341199762077, "learning_rate": 2.6148016364391827e-06, "loss": 0.8401, "step": 274890 }, { "epoch": 4.826278551238611, "grad_norm": 0.05862363979357398, "learning_rate": 2.613562274837633e-06, "loss": 0.8355, "step": 274900 }, { "epoch": 4.826454116118612, "grad_norm": 0.05954218983085429, "learning_rate": 2.6123241598111086e-06, "loss": 0.8364, "step": 274910 }, { "epoch": 4.826629680998613, "grad_norm": 0.05281234623293971, "learning_rate": 2.6110872913752958e-06, "loss": 0.8418, "step": 274920 }, { "epoch": 4.826805245878615, "grad_norm": 0.056388847867907446, "learning_rate": 2.6098516695458667e-06, "loss": 0.8409, "step": 274930 }, { "epoch": 4.826980810758616, "grad_norm": 0.0486185356400351, "learning_rate": 2.6086172943384622e-06, "loss": 0.8349, "step": 274940 }, { "epoch": 4.8271563756386175, "grad_norm": 0.08346628289157866, "learning_rate": 2.607384165768745e-06, "loss": 0.8353, "step": 274950 }, { "epoch": 4.827331940518619, "grad_norm": 0.04779139073439817, "learning_rate": 2.606152283852322e-06, "loss": 0.8384, "step": 274960 }, { "epoch": 4.82750750539862, "grad_norm": 0.048852519393326965, "learning_rate": 2.6049216486048014e-06, "loss": 0.8377, "step": 274970 }, { "epoch": 4.8276830702786215, "grad_norm": 0.050257859554560774, "learning_rate": 2.6036922600417905e-06, "loss": 0.8372, "step": 274980 }, { "epoch": 4.827858635158623, "grad_norm": 0.055336681752509755, "learning_rate": 2.602464118178853e-06, "loss": 0.834, "step": 274990 }, { "epoch": 4.8280342000386245, "grad_norm": 0.05434678065255687, "learning_rate": 2.6012372230315518e-06, "loss": 0.8503, "step": 275000 }, { "epoch": 4.8282097649186255, "grad_norm": 0.052684484263078535, "learning_rate": 2.600011574615429e-06, "loss": 0.8405, "step": 275010 }, { "epoch": 4.8283853297986274, "grad_norm": 0.06521905691636634, "learning_rate": 2.5987871729460147e-06, "loss": 0.8384, "step": 275020 }, { "epoch": 4.8285608946786285, "grad_norm": 0.051640154956327164, "learning_rate": 2.5975640180388403e-06, "loss": 0.8392, "step": 275030 }, { "epoch": 4.8287364595586295, "grad_norm": 0.05885807236413016, "learning_rate": 2.5963421099093807e-06, "loss": 0.8363, "step": 275040 }, { "epoch": 4.828912024438631, "grad_norm": 0.05173920681681754, "learning_rate": 2.595121448573123e-06, "loss": 0.8408, "step": 275050 }, { "epoch": 4.8290875893186325, "grad_norm": 0.05597530773778599, "learning_rate": 2.5939020340455316e-06, "loss": 0.8411, "step": 275060 }, { "epoch": 4.829263154198634, "grad_norm": 0.05262749342030055, "learning_rate": 2.592683866342071e-06, "loss": 0.8348, "step": 275070 }, { "epoch": 4.829438719078635, "grad_norm": 0.054545607354975614, "learning_rate": 2.5914669454781623e-06, "loss": 0.8344, "step": 275080 }, { "epoch": 4.829614283958637, "grad_norm": 0.04718014786378896, "learning_rate": 2.5902512714692257e-06, "loss": 0.8359, "step": 275090 }, { "epoch": 4.829789848838638, "grad_norm": 0.05157151816232482, "learning_rate": 2.589036844330671e-06, "loss": 0.8404, "step": 275100 }, { "epoch": 4.829965413718639, "grad_norm": 0.050692196710622056, "learning_rate": 2.587823664077875e-06, "loss": 0.8367, "step": 275110 }, { "epoch": 4.830140978598641, "grad_norm": 0.05372257961884488, "learning_rate": 2.586611730726214e-06, "loss": 0.8305, "step": 275120 }, { "epoch": 4.830316543478642, "grad_norm": 0.048736657223681625, "learning_rate": 2.5854010442910437e-06, "loss": 0.8381, "step": 275130 }, { "epoch": 4.830492108358644, "grad_norm": 0.051002250305920815, "learning_rate": 2.584191604787707e-06, "loss": 0.8339, "step": 275140 }, { "epoch": 4.830667673238645, "grad_norm": 0.04422490906868055, "learning_rate": 2.5829834122315145e-06, "loss": 0.8325, "step": 275150 }, { "epoch": 4.830843238118646, "grad_norm": 0.04397252465971796, "learning_rate": 2.5817764666377887e-06, "loss": 0.8395, "step": 275160 }, { "epoch": 4.831018802998648, "grad_norm": 0.04628613705195507, "learning_rate": 2.5805707680218176e-06, "loss": 0.8349, "step": 275170 }, { "epoch": 4.831194367878649, "grad_norm": 0.049161076470542485, "learning_rate": 2.5793663163988793e-06, "loss": 0.837, "step": 275180 }, { "epoch": 4.831369932758651, "grad_norm": 0.05100449603308824, "learning_rate": 2.57816311178423e-06, "loss": 0.8404, "step": 275190 }, { "epoch": 4.831545497638652, "grad_norm": 0.06363703358697169, "learning_rate": 2.5769611541931137e-06, "loss": 0.8417, "step": 275200 }, { "epoch": 4.831721062518653, "grad_norm": 0.06872160612271697, "learning_rate": 2.575760443640765e-06, "loss": 0.8445, "step": 275210 }, { "epoch": 4.831896627398655, "grad_norm": 0.05203915890964668, "learning_rate": 2.5745609801423953e-06, "loss": 0.8417, "step": 275220 }, { "epoch": 4.832072192278656, "grad_norm": 0.05593835073480344, "learning_rate": 2.5733627637131945e-06, "loss": 0.8337, "step": 275230 }, { "epoch": 4.832247757158658, "grad_norm": 0.047085507357207824, "learning_rate": 2.5721657943683635e-06, "loss": 0.836, "step": 275240 }, { "epoch": 4.832423322038659, "grad_norm": 0.09524147808357304, "learning_rate": 2.570970072123048e-06, "loss": 0.8422, "step": 275250 }, { "epoch": 4.832598886918661, "grad_norm": 0.051942729066597956, "learning_rate": 2.569775596992394e-06, "loss": 0.8378, "step": 275260 }, { "epoch": 4.832774451798662, "grad_norm": 0.05412395162369996, "learning_rate": 2.5685823689915696e-06, "loss": 0.8395, "step": 275270 }, { "epoch": 4.832950016678664, "grad_norm": 0.05948476887440438, "learning_rate": 2.5673903881356545e-06, "loss": 0.8372, "step": 275280 }, { "epoch": 4.833125581558665, "grad_norm": 0.049804838660985826, "learning_rate": 2.5661996544397726e-06, "loss": 0.8388, "step": 275290 }, { "epoch": 4.833301146438666, "grad_norm": 0.05177429703471617, "learning_rate": 2.5650101679190042e-06, "loss": 0.841, "step": 275300 }, { "epoch": 4.833476711318668, "grad_norm": 0.04593440643607699, "learning_rate": 2.563821928588428e-06, "loss": 0.84, "step": 275310 }, { "epoch": 4.833652276198669, "grad_norm": 0.046546517254690646, "learning_rate": 2.562634936463093e-06, "loss": 0.8412, "step": 275320 }, { "epoch": 4.833827841078671, "grad_norm": 0.053945689611284466, "learning_rate": 2.5614491915580333e-06, "loss": 0.8361, "step": 275330 }, { "epoch": 4.834003405958672, "grad_norm": 0.04283580434020563, "learning_rate": 2.560264693888275e-06, "loss": 0.8395, "step": 275340 }, { "epoch": 4.834178970838673, "grad_norm": 0.05618254999070159, "learning_rate": 2.559081443468843e-06, "loss": 0.8389, "step": 275350 }, { "epoch": 4.834354535718675, "grad_norm": 0.06366411461407076, "learning_rate": 2.5578994403146954e-06, "loss": 0.8289, "step": 275360 }, { "epoch": 4.834530100598676, "grad_norm": 0.042819735730527664, "learning_rate": 2.5567186844408472e-06, "loss": 0.8399, "step": 275370 }, { "epoch": 4.834705665478678, "grad_norm": 0.03780578032069957, "learning_rate": 2.5555391758622243e-06, "loss": 0.8345, "step": 275380 }, { "epoch": 4.834881230358679, "grad_norm": 0.05182234473742897, "learning_rate": 2.5543609145937963e-06, "loss": 0.8348, "step": 275390 }, { "epoch": 4.83505679523868, "grad_norm": 0.10085801540654526, "learning_rate": 2.5531839006504785e-06, "loss": 0.8377, "step": 275400 }, { "epoch": 4.835232360118682, "grad_norm": 0.06209971587856139, "learning_rate": 2.5520081340471966e-06, "loss": 0.8398, "step": 275410 }, { "epoch": 4.835407924998683, "grad_norm": 0.05762555703290678, "learning_rate": 2.5508336147988338e-06, "loss": 0.8396, "step": 275420 }, { "epoch": 4.835583489878685, "grad_norm": 0.05130963281937535, "learning_rate": 2.549660342920271e-06, "loss": 0.8459, "step": 275430 }, { "epoch": 4.835759054758686, "grad_norm": 0.04909627370199849, "learning_rate": 2.5484883184263795e-06, "loss": 0.8368, "step": 275440 }, { "epoch": 4.835934619638687, "grad_norm": 0.053860178333573566, "learning_rate": 2.54731754133202e-06, "loss": 0.8356, "step": 275450 }, { "epoch": 4.836110184518689, "grad_norm": 0.04605035020565666, "learning_rate": 2.5461480116520085e-06, "loss": 0.8335, "step": 275460 }, { "epoch": 4.83628574939869, "grad_norm": 0.05883822193550608, "learning_rate": 2.5449797294011716e-06, "loss": 0.8397, "step": 275470 }, { "epoch": 4.836461314278692, "grad_norm": 0.05412926537854916, "learning_rate": 2.543812694594304e-06, "loss": 0.8415, "step": 275480 }, { "epoch": 4.836636879158693, "grad_norm": 0.04851425914999154, "learning_rate": 2.542646907246211e-06, "loss": 0.8478, "step": 275490 }, { "epoch": 4.836812444038695, "grad_norm": 0.04416477076971267, "learning_rate": 2.541482367371642e-06, "loss": 0.8451, "step": 275500 }, { "epoch": 4.836988008918696, "grad_norm": 0.04532828759384817, "learning_rate": 2.5403190749853585e-06, "loss": 0.8311, "step": 275510 }, { "epoch": 4.837163573798698, "grad_norm": 0.08048066112268504, "learning_rate": 2.539157030102111e-06, "loss": 0.8455, "step": 275520 }, { "epoch": 4.837339138678699, "grad_norm": 0.053524413827905466, "learning_rate": 2.5379962327366057e-06, "loss": 0.8289, "step": 275530 }, { "epoch": 4.8375147035587, "grad_norm": 0.04396071282071184, "learning_rate": 2.536836682903571e-06, "loss": 0.8291, "step": 275540 }, { "epoch": 4.837690268438702, "grad_norm": 0.06706345461420463, "learning_rate": 2.5356783806176688e-06, "loss": 0.8358, "step": 275550 }, { "epoch": 4.837865833318703, "grad_norm": 0.05665783822882606, "learning_rate": 2.534521325893605e-06, "loss": 0.8353, "step": 275560 }, { "epoch": 4.838041398198705, "grad_norm": 0.04556393566022263, "learning_rate": 2.5333655187460207e-06, "loss": 0.8456, "step": 275570 }, { "epoch": 4.838216963078706, "grad_norm": 0.05369280877628928, "learning_rate": 2.532210959189567e-06, "loss": 0.8404, "step": 275580 }, { "epoch": 4.838392527958707, "grad_norm": 0.044311803142480566, "learning_rate": 2.5310576472388726e-06, "loss": 0.8455, "step": 275590 }, { "epoch": 4.838568092838709, "grad_norm": 0.049509042018977246, "learning_rate": 2.5299055829085456e-06, "loss": 0.8465, "step": 275600 }, { "epoch": 4.83874365771871, "grad_norm": 0.04594018448852915, "learning_rate": 2.5287547662131925e-06, "loss": 0.8277, "step": 275610 }, { "epoch": 4.838919222598712, "grad_norm": 0.08187710357293193, "learning_rate": 2.5276051971673884e-06, "loss": 0.8315, "step": 275620 }, { "epoch": 4.839094787478713, "grad_norm": 0.04193027949336782, "learning_rate": 2.5264568757856966e-06, "loss": 0.8406, "step": 275630 }, { "epoch": 4.839270352358714, "grad_norm": 0.0473975199940962, "learning_rate": 2.5253098020826577e-06, "loss": 0.8347, "step": 275640 }, { "epoch": 4.839445917238716, "grad_norm": 0.04849877965430718, "learning_rate": 2.524163976072835e-06, "loss": 0.8357, "step": 275650 }, { "epoch": 4.839621482118717, "grad_norm": 0.06155250209652752, "learning_rate": 2.5230193977707163e-06, "loss": 0.8452, "step": 275660 }, { "epoch": 4.839797046998719, "grad_norm": 0.06199273650825144, "learning_rate": 2.5218760671908205e-06, "loss": 0.8327, "step": 275670 }, { "epoch": 4.83997261187872, "grad_norm": 0.057364288380320434, "learning_rate": 2.5207339843476218e-06, "loss": 0.8329, "step": 275680 }, { "epoch": 4.8401481767587216, "grad_norm": 0.14788286082051325, "learning_rate": 2.519593149255608e-06, "loss": 0.8383, "step": 275690 }, { "epoch": 4.840323741638723, "grad_norm": 0.05700793108994467, "learning_rate": 2.5184535619292097e-06, "loss": 0.8309, "step": 275700 }, { "epoch": 4.840499306518724, "grad_norm": 0.06555397919989249, "learning_rate": 2.5173152223828915e-06, "loss": 0.83, "step": 275710 }, { "epoch": 4.8406748713987255, "grad_norm": 0.07986507132894739, "learning_rate": 2.5161781306310525e-06, "loss": 0.844, "step": 275720 }, { "epoch": 4.840850436278727, "grad_norm": 0.05686283488626891, "learning_rate": 2.5150422866881126e-06, "loss": 0.8413, "step": 275730 }, { "epoch": 4.8410260011587285, "grad_norm": 0.05130869893717305, "learning_rate": 2.5139076905684594e-06, "loss": 0.8439, "step": 275740 }, { "epoch": 4.8412015660387295, "grad_norm": 0.07498377017035839, "learning_rate": 2.5127743422864803e-06, "loss": 0.839, "step": 275750 }, { "epoch": 4.8413771309187315, "grad_norm": 0.04660732178528223, "learning_rate": 2.5116422418565185e-06, "loss": 0.8362, "step": 275760 }, { "epoch": 4.8415526957987325, "grad_norm": 0.0458257915781942, "learning_rate": 2.510511389292919e-06, "loss": 0.8319, "step": 275770 }, { "epoch": 4.8417282606787335, "grad_norm": 0.04802905838148354, "learning_rate": 2.509381784610034e-06, "loss": 0.8324, "step": 275780 }, { "epoch": 4.841903825558735, "grad_norm": 0.051097637100227564, "learning_rate": 2.5082534278221432e-06, "loss": 0.8402, "step": 275790 }, { "epoch": 4.8420793904387365, "grad_norm": 0.048735907257290434, "learning_rate": 2.5071263189435562e-06, "loss": 0.8405, "step": 275800 }, { "epoch": 4.842254955318738, "grad_norm": 0.0505762838295391, "learning_rate": 2.5060004579885612e-06, "loss": 0.8317, "step": 275810 }, { "epoch": 4.842430520198739, "grad_norm": 0.055370647702656865, "learning_rate": 2.5048758449714146e-06, "loss": 0.8401, "step": 275820 }, { "epoch": 4.8426060850787405, "grad_norm": 0.044215602476360014, "learning_rate": 2.503752479906361e-06, "loss": 0.8407, "step": 275830 }, { "epoch": 4.842781649958742, "grad_norm": 0.04804937722503847, "learning_rate": 2.5026303628076444e-06, "loss": 0.8462, "step": 275840 }, { "epoch": 4.842957214838743, "grad_norm": 0.06136555097975872, "learning_rate": 2.5015094936894875e-06, "loss": 0.8439, "step": 275850 }, { "epoch": 4.843132779718745, "grad_norm": 0.07742245251607874, "learning_rate": 2.500389872566069e-06, "loss": 0.8375, "step": 275860 }, { "epoch": 4.843308344598746, "grad_norm": 0.05057122235690557, "learning_rate": 2.499271499451601e-06, "loss": 0.8366, "step": 275870 }, { "epoch": 4.843483909478747, "grad_norm": 0.04322844832150202, "learning_rate": 2.498154374360229e-06, "loss": 0.8274, "step": 275880 }, { "epoch": 4.843659474358749, "grad_norm": 0.052340456405622154, "learning_rate": 2.497038497306131e-06, "loss": 0.839, "step": 275890 }, { "epoch": 4.84383503923875, "grad_norm": 0.049263236388273256, "learning_rate": 2.495923868303421e-06, "loss": 0.8381, "step": 275900 }, { "epoch": 4.844010604118752, "grad_norm": 0.060230123523162926, "learning_rate": 2.4948104873662323e-06, "loss": 0.8306, "step": 275910 }, { "epoch": 4.844186168998753, "grad_norm": 0.058678017553847876, "learning_rate": 2.4936983545086896e-06, "loss": 0.8342, "step": 275920 }, { "epoch": 4.844361733878755, "grad_norm": 0.051773741856335785, "learning_rate": 2.49258746974485e-06, "loss": 0.8412, "step": 275930 }, { "epoch": 4.844537298758756, "grad_norm": 0.04648072228877954, "learning_rate": 2.491477833088816e-06, "loss": 0.8382, "step": 275940 }, { "epoch": 4.844712863638758, "grad_norm": 0.04901295136462109, "learning_rate": 2.490369444554633e-06, "loss": 0.8374, "step": 275950 }, { "epoch": 4.844888428518759, "grad_norm": 0.04329450378663491, "learning_rate": 2.4892623041563492e-06, "loss": 0.8298, "step": 275960 }, { "epoch": 4.84506399339876, "grad_norm": 0.05486242725083093, "learning_rate": 2.488156411907989e-06, "loss": 0.8337, "step": 275970 }, { "epoch": 4.845239558278762, "grad_norm": 0.058391220082592336, "learning_rate": 2.4870517678235764e-06, "loss": 0.8374, "step": 275980 }, { "epoch": 4.845415123158763, "grad_norm": 0.05159378883692771, "learning_rate": 2.4859483719170936e-06, "loss": 0.8426, "step": 275990 }, { "epoch": 4.845590688038765, "grad_norm": 0.05160197297247187, "learning_rate": 2.48484622420252e-06, "loss": 0.8404, "step": 276000 }, { "epoch": 4.845766252918766, "grad_norm": 0.06867158476020538, "learning_rate": 2.483745324693826e-06, "loss": 0.8373, "step": 276010 }, { "epoch": 4.845941817798767, "grad_norm": 0.06819101443023562, "learning_rate": 2.48264567340496e-06, "loss": 0.832, "step": 276020 }, { "epoch": 4.846117382678769, "grad_norm": 0.05318365951033614, "learning_rate": 2.481547270349858e-06, "loss": 0.8399, "step": 276030 }, { "epoch": 4.84629294755877, "grad_norm": 0.05632614185900767, "learning_rate": 2.4804501155424244e-06, "loss": 0.8324, "step": 276040 }, { "epoch": 4.846468512438772, "grad_norm": 0.04447132396113028, "learning_rate": 2.479354208996585e-06, "loss": 0.8397, "step": 276050 }, { "epoch": 4.846644077318773, "grad_norm": 0.05314569981349564, "learning_rate": 2.4782595507261995e-06, "loss": 0.8371, "step": 276060 }, { "epoch": 4.846819642198774, "grad_norm": 0.055679117474786245, "learning_rate": 2.477166140745139e-06, "loss": 0.8381, "step": 276070 }, { "epoch": 4.846995207078776, "grad_norm": 0.049484615961554505, "learning_rate": 2.4760739790672735e-06, "loss": 0.8379, "step": 276080 }, { "epoch": 4.847170771958777, "grad_norm": 0.07838242435434019, "learning_rate": 2.4749830657064317e-06, "loss": 0.8357, "step": 276090 }, { "epoch": 4.847346336838779, "grad_norm": 0.0532957241837975, "learning_rate": 2.473893400676439e-06, "loss": 0.8445, "step": 276100 }, { "epoch": 4.84752190171878, "grad_norm": 0.05089799263539613, "learning_rate": 2.4728049839911016e-06, "loss": 0.8379, "step": 276110 }, { "epoch": 4.847697466598781, "grad_norm": 0.05699995316333513, "learning_rate": 2.4717178156642012e-06, "loss": 0.8418, "step": 276120 }, { "epoch": 4.847873031478783, "grad_norm": 0.05349635477107371, "learning_rate": 2.4706318957095217e-06, "loss": 0.8333, "step": 276130 }, { "epoch": 4.848048596358784, "grad_norm": 0.04723675702953651, "learning_rate": 2.469547224140813e-06, "loss": 0.8364, "step": 276140 }, { "epoch": 4.848224161238786, "grad_norm": 0.055297224968775295, "learning_rate": 2.4684638009718353e-06, "loss": 0.8362, "step": 276150 }, { "epoch": 4.848399726118787, "grad_norm": 0.048493980133704095, "learning_rate": 2.4673816262162957e-06, "loss": 0.8377, "step": 276160 }, { "epoch": 4.848575290998789, "grad_norm": 0.04394980395573552, "learning_rate": 2.466300699887922e-06, "loss": 0.8433, "step": 276170 }, { "epoch": 4.84875085587879, "grad_norm": 0.042493998597317166, "learning_rate": 2.465221022000397e-06, "loss": 0.846, "step": 276180 }, { "epoch": 4.848926420758792, "grad_norm": 0.05819088024197446, "learning_rate": 2.464142592567407e-06, "loss": 0.8449, "step": 276190 }, { "epoch": 4.849101985638793, "grad_norm": 0.06545653627919384, "learning_rate": 2.463065411602612e-06, "loss": 0.8377, "step": 276200 }, { "epoch": 4.849277550518794, "grad_norm": 0.05491103641251533, "learning_rate": 2.461989479119664e-06, "loss": 0.8463, "step": 276210 }, { "epoch": 4.849453115398796, "grad_norm": 0.04656565088005356, "learning_rate": 2.4609147951322032e-06, "loss": 0.8364, "step": 276220 }, { "epoch": 4.849628680278797, "grad_norm": 0.04328329642344152, "learning_rate": 2.4598413596538255e-06, "loss": 0.845, "step": 276230 }, { "epoch": 4.849804245158799, "grad_norm": 0.050196473362790116, "learning_rate": 2.4587691726981483e-06, "loss": 0.8357, "step": 276240 }, { "epoch": 4.8499798100388, "grad_norm": 0.056717227431924006, "learning_rate": 2.4576982342787475e-06, "loss": 0.8281, "step": 276250 }, { "epoch": 4.850155374918801, "grad_norm": 0.058811752138621144, "learning_rate": 2.456628544409207e-06, "loss": 0.8309, "step": 276260 }, { "epoch": 4.850330939798803, "grad_norm": 0.0647470095609219, "learning_rate": 2.455560103103057e-06, "loss": 0.8344, "step": 276270 }, { "epoch": 4.850506504678804, "grad_norm": 0.06661353454187778, "learning_rate": 2.4544929103738387e-06, "loss": 0.8321, "step": 276280 }, { "epoch": 4.850682069558806, "grad_norm": 0.04373796452696412, "learning_rate": 2.4534269662351044e-06, "loss": 0.8323, "step": 276290 }, { "epoch": 4.850857634438807, "grad_norm": 0.06027810260059136, "learning_rate": 2.4523622707003185e-06, "loss": 0.8371, "step": 276300 }, { "epoch": 4.851033199318808, "grad_norm": 0.05859440555758353, "learning_rate": 2.451298823782989e-06, "loss": 0.8405, "step": 276310 }, { "epoch": 4.85120876419881, "grad_norm": 0.04331950437295324, "learning_rate": 2.450236625496602e-06, "loss": 0.8401, "step": 276320 }, { "epoch": 4.851384329078811, "grad_norm": 0.06369793618981957, "learning_rate": 2.4491756758546e-06, "loss": 0.8406, "step": 276330 }, { "epoch": 4.851559893958813, "grad_norm": 0.06843181043045812, "learning_rate": 2.448115974870425e-06, "loss": 0.8423, "step": 276340 }, { "epoch": 4.851735458838814, "grad_norm": 0.04344347243344211, "learning_rate": 2.4470575225575087e-06, "loss": 0.8414, "step": 276350 }, { "epoch": 4.851911023718816, "grad_norm": 0.0523883445168756, "learning_rate": 2.44600031892926e-06, "loss": 0.8377, "step": 276360 }, { "epoch": 4.852086588598817, "grad_norm": 0.052692592425111685, "learning_rate": 2.4449443639990772e-06, "loss": 0.8381, "step": 276370 }, { "epoch": 4.852262153478818, "grad_norm": 0.06527803053020698, "learning_rate": 2.4438896577803364e-06, "loss": 0.827, "step": 276380 }, { "epoch": 4.85243771835882, "grad_norm": 0.04824221271245805, "learning_rate": 2.4428362002864036e-06, "loss": 0.8393, "step": 276390 }, { "epoch": 4.852613283238821, "grad_norm": 0.06520037753436923, "learning_rate": 2.441783991530632e-06, "loss": 0.8438, "step": 276400 }, { "epoch": 4.852788848118823, "grad_norm": 0.04809592940544788, "learning_rate": 2.4407330315263334e-06, "loss": 0.8358, "step": 276410 }, { "epoch": 4.852964412998824, "grad_norm": 0.05387894258989316, "learning_rate": 2.439683320286839e-06, "loss": 0.8406, "step": 276420 }, { "epoch": 4.853139977878826, "grad_norm": 0.05940606072828876, "learning_rate": 2.4386348578254484e-06, "loss": 0.8429, "step": 276430 }, { "epoch": 4.853315542758827, "grad_norm": 0.05348443066623755, "learning_rate": 2.4375876441554397e-06, "loss": 0.8425, "step": 276440 }, { "epoch": 4.853491107638828, "grad_norm": 0.045909406474687366, "learning_rate": 2.436541679290089e-06, "loss": 0.8366, "step": 276450 }, { "epoch": 4.8536666725188296, "grad_norm": 0.043549743611623844, "learning_rate": 2.4354969632426414e-06, "loss": 0.8375, "step": 276460 }, { "epoch": 4.853842237398831, "grad_norm": 0.05512808942021891, "learning_rate": 2.4344534960263415e-06, "loss": 0.8495, "step": 276470 }, { "epoch": 4.8540178022788325, "grad_norm": 0.06407925986476999, "learning_rate": 2.4334112776544007e-06, "loss": 0.8399, "step": 276480 }, { "epoch": 4.8541933671588335, "grad_norm": 0.048968263130823385, "learning_rate": 2.43237030814003e-06, "loss": 0.8391, "step": 276490 }, { "epoch": 4.854368932038835, "grad_norm": 0.06542310584154047, "learning_rate": 2.4313305874964304e-06, "loss": 0.8378, "step": 276500 }, { "epoch": 4.8545444969188365, "grad_norm": 0.04884759788808477, "learning_rate": 2.430292115736747e-06, "loss": 0.8395, "step": 276510 }, { "epoch": 4.8547200617988375, "grad_norm": 0.05447029383285702, "learning_rate": 2.4292548928741584e-06, "loss": 0.8415, "step": 276520 }, { "epoch": 4.8548956266788394, "grad_norm": 0.05073164949252843, "learning_rate": 2.4282189189217985e-06, "loss": 0.8411, "step": 276530 }, { "epoch": 4.8550711915588405, "grad_norm": 0.05319537321030178, "learning_rate": 2.4271841938928028e-06, "loss": 0.8376, "step": 276540 }, { "epoch": 4.8552467564388415, "grad_norm": 0.05098083977395918, "learning_rate": 2.426150717800272e-06, "loss": 0.8371, "step": 276550 }, { "epoch": 4.855422321318843, "grad_norm": 0.049546691505777715, "learning_rate": 2.4251184906572974e-06, "loss": 0.8391, "step": 276560 }, { "epoch": 4.8555978861988445, "grad_norm": 0.04314729154365082, "learning_rate": 2.4240875124769687e-06, "loss": 0.8439, "step": 276570 }, { "epoch": 4.855773451078846, "grad_norm": 0.05368713802787919, "learning_rate": 2.4230577832723444e-06, "loss": 0.8334, "step": 276580 }, { "epoch": 4.855949015958847, "grad_norm": 0.054508763806073175, "learning_rate": 2.42202930305647e-06, "loss": 0.8364, "step": 276590 }, { "epoch": 4.856124580838849, "grad_norm": 0.05892810592262698, "learning_rate": 2.421002071842371e-06, "loss": 0.8326, "step": 276600 }, { "epoch": 4.85630014571885, "grad_norm": 0.056607946749304826, "learning_rate": 2.419976089643071e-06, "loss": 0.8336, "step": 276610 }, { "epoch": 4.856475710598852, "grad_norm": 0.05416060055113515, "learning_rate": 2.4189513564715735e-06, "loss": 0.8464, "step": 276620 }, { "epoch": 4.856651275478853, "grad_norm": 0.05159538645805233, "learning_rate": 2.4179278723408482e-06, "loss": 0.8494, "step": 276630 }, { "epoch": 4.856826840358854, "grad_norm": 0.04883583262373563, "learning_rate": 2.416905637263875e-06, "loss": 0.8307, "step": 276640 }, { "epoch": 4.857002405238856, "grad_norm": 0.06954892570127008, "learning_rate": 2.415884651253591e-06, "loss": 0.8387, "step": 276650 }, { "epoch": 4.857177970118857, "grad_norm": 0.06553701484895307, "learning_rate": 2.4148649143229545e-06, "loss": 0.8376, "step": 276660 }, { "epoch": 4.857353534998859, "grad_norm": 0.07778962064569517, "learning_rate": 2.4138464264848693e-06, "loss": 0.8422, "step": 276670 }, { "epoch": 4.85752909987886, "grad_norm": 0.057663401924703164, "learning_rate": 2.4128291877522395e-06, "loss": 0.8396, "step": 276680 }, { "epoch": 4.857704664758861, "grad_norm": 0.04161193910815434, "learning_rate": 2.4118131981379573e-06, "loss": 0.8371, "step": 276690 }, { "epoch": 4.857880229638863, "grad_norm": 0.0557763765595428, "learning_rate": 2.4107984576549044e-06, "loss": 0.8411, "step": 276700 }, { "epoch": 4.858055794518864, "grad_norm": 0.04779868596290374, "learning_rate": 2.409784966315919e-06, "loss": 0.8338, "step": 276710 }, { "epoch": 4.858231359398866, "grad_norm": 0.04418221175500474, "learning_rate": 2.4087727241338603e-06, "loss": 0.8322, "step": 276720 }, { "epoch": 4.858406924278867, "grad_norm": 0.058312347057849406, "learning_rate": 2.4077617311215444e-06, "loss": 0.836, "step": 276730 }, { "epoch": 4.858582489158868, "grad_norm": 0.04641820138388282, "learning_rate": 2.4067519872917867e-06, "loss": 0.8516, "step": 276740 }, { "epoch": 4.85875805403887, "grad_norm": 0.03865854099883858, "learning_rate": 2.4057434926573703e-06, "loss": 0.8435, "step": 276750 }, { "epoch": 4.858933618918871, "grad_norm": 0.05442460674651128, "learning_rate": 2.4047362472310777e-06, "loss": 0.8325, "step": 276760 }, { "epoch": 4.859109183798873, "grad_norm": 0.05994117320073124, "learning_rate": 2.403730251025681e-06, "loss": 0.8488, "step": 276770 }, { "epoch": 4.859284748678874, "grad_norm": 0.04911111664374036, "learning_rate": 2.402725504053908e-06, "loss": 0.8315, "step": 276780 }, { "epoch": 4.859460313558875, "grad_norm": 0.04960633629942295, "learning_rate": 2.401722006328497e-06, "loss": 0.8359, "step": 276790 }, { "epoch": 4.859635878438877, "grad_norm": 0.04760099398077638, "learning_rate": 2.4007197578621763e-06, "loss": 0.8355, "step": 276800 }, { "epoch": 4.859811443318878, "grad_norm": 0.0649158384603694, "learning_rate": 2.3997187586676294e-06, "loss": 0.8306, "step": 276810 }, { "epoch": 4.85998700819888, "grad_norm": 0.06024094945675965, "learning_rate": 2.398719008757541e-06, "loss": 0.8415, "step": 276820 }, { "epoch": 4.860162573078881, "grad_norm": 0.04982201705123357, "learning_rate": 2.397720508144572e-06, "loss": 0.8381, "step": 276830 }, { "epoch": 4.860338137958883, "grad_norm": 0.061975843147084354, "learning_rate": 2.3967232568413955e-06, "loss": 0.8438, "step": 276840 }, { "epoch": 4.860513702838884, "grad_norm": 0.0662055213992838, "learning_rate": 2.3957272548606297e-06, "loss": 0.8358, "step": 276850 }, { "epoch": 4.860689267718886, "grad_norm": 0.05001104044130969, "learning_rate": 2.394732502214892e-06, "loss": 0.833, "step": 276860 }, { "epoch": 4.860864832598887, "grad_norm": 0.05260662935296916, "learning_rate": 2.39373899891679e-06, "loss": 0.8372, "step": 276870 }, { "epoch": 4.861040397478888, "grad_norm": 0.053319419348396374, "learning_rate": 2.392746744978919e-06, "loss": 0.8479, "step": 276880 }, { "epoch": 4.86121596235889, "grad_norm": 0.06306468142262597, "learning_rate": 2.3917557404138316e-06, "loss": 0.8381, "step": 276890 }, { "epoch": 4.861391527238891, "grad_norm": 0.054155415269286154, "learning_rate": 2.390765985234112e-06, "loss": 0.8365, "step": 276900 }, { "epoch": 4.861567092118893, "grad_norm": 0.04555726628157994, "learning_rate": 2.38977747945228e-06, "loss": 0.8373, "step": 276910 }, { "epoch": 4.861742656998894, "grad_norm": 0.058150079038495416, "learning_rate": 2.3887902230808647e-06, "loss": 0.837, "step": 276920 }, { "epoch": 4.861918221878895, "grad_norm": 0.04850078884688074, "learning_rate": 2.3878042161323746e-06, "loss": 0.8325, "step": 276930 }, { "epoch": 4.862093786758897, "grad_norm": 0.04689012869985865, "learning_rate": 2.3868194586193067e-06, "loss": 0.8344, "step": 276940 }, { "epoch": 4.862269351638898, "grad_norm": 0.04887574303728333, "learning_rate": 2.385835950554136e-06, "loss": 0.8415, "step": 276950 }, { "epoch": 4.8624449165189, "grad_norm": 0.04966807185123525, "learning_rate": 2.3848536919493152e-06, "loss": 0.8276, "step": 276960 }, { "epoch": 4.862620481398901, "grad_norm": 0.05444379217321977, "learning_rate": 2.383872682817309e-06, "loss": 0.8428, "step": 276970 }, { "epoch": 4.862796046278902, "grad_norm": 0.06021040823578459, "learning_rate": 2.382892923170537e-06, "loss": 0.839, "step": 276980 }, { "epoch": 4.862971611158904, "grad_norm": 0.05416367209313438, "learning_rate": 2.381914413021397e-06, "loss": 0.8402, "step": 276990 }, { "epoch": 4.863147176038905, "grad_norm": 0.043157651613715986, "learning_rate": 2.3809371523823096e-06, "loss": 0.8306, "step": 277000 }, { "epoch": 4.863322740918907, "grad_norm": 0.0511080186433846, "learning_rate": 2.3799611412656508e-06, "loss": 0.8352, "step": 277010 }, { "epoch": 4.863498305798908, "grad_norm": 0.050029273164310425, "learning_rate": 2.378986379683786e-06, "loss": 0.8412, "step": 277020 }, { "epoch": 4.86367387067891, "grad_norm": 0.05285014175964713, "learning_rate": 2.3780128676490573e-06, "loss": 0.8438, "step": 277030 }, { "epoch": 4.863849435558911, "grad_norm": 0.045815316761178476, "learning_rate": 2.3770406051738197e-06, "loss": 0.8333, "step": 277040 }, { "epoch": 4.864025000438912, "grad_norm": 0.056458357979612246, "learning_rate": 2.3760695922703727e-06, "loss": 0.835, "step": 277050 }, { "epoch": 4.864200565318914, "grad_norm": 0.05373996469512881, "learning_rate": 2.375099828951014e-06, "loss": 0.8388, "step": 277060 }, { "epoch": 4.864376130198915, "grad_norm": 0.052202125085458864, "learning_rate": 2.374131315228055e-06, "loss": 0.8431, "step": 277070 }, { "epoch": 4.864551695078917, "grad_norm": 0.06248961676193048, "learning_rate": 2.3731640511137507e-06, "loss": 0.8316, "step": 277080 }, { "epoch": 4.864727259958918, "grad_norm": 0.06417479145541581, "learning_rate": 2.3721980366203563e-06, "loss": 0.8343, "step": 277090 }, { "epoch": 4.86490282483892, "grad_norm": 0.05279870527726918, "learning_rate": 2.3712332717601154e-06, "loss": 0.8334, "step": 277100 }, { "epoch": 4.865078389718921, "grad_norm": 0.05974434683991479, "learning_rate": 2.370269756545251e-06, "loss": 0.8389, "step": 277110 }, { "epoch": 4.865253954598922, "grad_norm": 0.04615149423052299, "learning_rate": 2.369307490987974e-06, "loss": 0.8435, "step": 277120 }, { "epoch": 4.865429519478924, "grad_norm": 0.049617499921966346, "learning_rate": 2.3683464751004627e-06, "loss": 0.8342, "step": 277130 }, { "epoch": 4.865605084358925, "grad_norm": 0.0760057223311987, "learning_rate": 2.367386708894917e-06, "loss": 0.836, "step": 277140 }, { "epoch": 4.865780649238927, "grad_norm": 0.04984481165671735, "learning_rate": 2.366428192383483e-06, "loss": 0.8417, "step": 277150 }, { "epoch": 4.865956214118928, "grad_norm": 0.040523243208213815, "learning_rate": 2.3654709255783053e-06, "loss": 0.8451, "step": 277160 }, { "epoch": 4.866131778998929, "grad_norm": 0.04736242142151796, "learning_rate": 2.364514908491507e-06, "loss": 0.8349, "step": 277170 }, { "epoch": 4.866307343878931, "grad_norm": 0.04034271552790983, "learning_rate": 2.363560141135223e-06, "loss": 0.8356, "step": 277180 }, { "epoch": 4.866482908758932, "grad_norm": 0.052032715564242805, "learning_rate": 2.3626066235215216e-06, "loss": 0.8378, "step": 277190 }, { "epoch": 4.866658473638934, "grad_norm": 0.05369074275971313, "learning_rate": 2.3616543556625037e-06, "loss": 0.8416, "step": 277200 }, { "epoch": 4.866834038518935, "grad_norm": 0.04855708869916052, "learning_rate": 2.360703337570238e-06, "loss": 0.8371, "step": 277210 }, { "epoch": 4.867009603398936, "grad_norm": 0.04900107812927804, "learning_rate": 2.3597535692567487e-06, "loss": 0.8455, "step": 277220 }, { "epoch": 4.8671851682789375, "grad_norm": 0.06743829085559142, "learning_rate": 2.3588050507340935e-06, "loss": 0.84, "step": 277230 }, { "epoch": 4.867360733158939, "grad_norm": 0.04454336247087602, "learning_rate": 2.357857782014285e-06, "loss": 0.8295, "step": 277240 }, { "epoch": 4.8675362980389405, "grad_norm": 0.045380465725132275, "learning_rate": 2.3569117631093154e-06, "loss": 0.8413, "step": 277250 }, { "epoch": 4.8677118629189415, "grad_norm": 0.04949551295321744, "learning_rate": 2.355966994031187e-06, "loss": 0.8429, "step": 277260 }, { "epoch": 4.8678874277989435, "grad_norm": 0.047766221560478966, "learning_rate": 2.3550234747918578e-06, "loss": 0.8416, "step": 277270 }, { "epoch": 4.8680629926789445, "grad_norm": 0.0452426218785097, "learning_rate": 2.3540812054032863e-06, "loss": 0.835, "step": 277280 }, { "epoch": 4.8682385575589455, "grad_norm": 0.042602584699881214, "learning_rate": 2.3531401858774206e-06, "loss": 0.8444, "step": 277290 }, { "epoch": 4.8684141224389474, "grad_norm": 0.04368750948059472, "learning_rate": 2.3522004162261637e-06, "loss": 0.8325, "step": 277300 }, { "epoch": 4.8685896873189485, "grad_norm": 0.05845885531055001, "learning_rate": 2.3512618964614298e-06, "loss": 0.8348, "step": 277310 }, { "epoch": 4.86876525219895, "grad_norm": 0.04792462454968859, "learning_rate": 2.3503246265951227e-06, "loss": 0.8375, "step": 277320 }, { "epoch": 4.868940817078951, "grad_norm": 0.052606405786951436, "learning_rate": 2.349388606639102e-06, "loss": 0.8417, "step": 277330 }, { "epoch": 4.869116381958953, "grad_norm": 0.048216316734338495, "learning_rate": 2.348453836605238e-06, "loss": 0.8291, "step": 277340 }, { "epoch": 4.869291946838954, "grad_norm": 0.06070337181364155, "learning_rate": 2.3475203165053792e-06, "loss": 0.8361, "step": 277350 }, { "epoch": 4.869467511718955, "grad_norm": 0.06353469194366601, "learning_rate": 2.3465880463513304e-06, "loss": 0.8403, "step": 277360 }, { "epoch": 4.869643076598957, "grad_norm": 0.056728655461388784, "learning_rate": 2.3456570261549177e-06, "loss": 0.8324, "step": 277370 }, { "epoch": 4.869818641478958, "grad_norm": 0.04684084792774294, "learning_rate": 2.3447272559279462e-06, "loss": 0.839, "step": 277380 }, { "epoch": 4.86999420635896, "grad_norm": 0.05376680040491101, "learning_rate": 2.3437987356821877e-06, "loss": 0.8422, "step": 277390 }, { "epoch": 4.870169771238961, "grad_norm": 0.05499766185130584, "learning_rate": 2.3428714654294014e-06, "loss": 0.8354, "step": 277400 }, { "epoch": 4.870345336118962, "grad_norm": 0.060098283066076286, "learning_rate": 2.3419454451813387e-06, "loss": 0.8336, "step": 277410 }, { "epoch": 4.870520900998964, "grad_norm": 0.04528530433418624, "learning_rate": 2.341020674949748e-06, "loss": 0.842, "step": 277420 }, { "epoch": 4.870696465878965, "grad_norm": 0.04518100583161324, "learning_rate": 2.340097154746313e-06, "loss": 0.8308, "step": 277430 }, { "epoch": 4.870872030758967, "grad_norm": 0.06289203051735735, "learning_rate": 2.339174884582773e-06, "loss": 0.8405, "step": 277440 }, { "epoch": 4.871047595638968, "grad_norm": 0.056291950914469535, "learning_rate": 2.338253864470789e-06, "loss": 0.8417, "step": 277450 }, { "epoch": 4.871223160518969, "grad_norm": 0.043895817237339485, "learning_rate": 2.3373340944220336e-06, "loss": 0.8363, "step": 277460 }, { "epoch": 4.871398725398971, "grad_norm": 0.04954573930877574, "learning_rate": 2.3364155744481695e-06, "loss": 0.8372, "step": 277470 }, { "epoch": 4.871574290278972, "grad_norm": 0.06686163789769836, "learning_rate": 2.3354983045608244e-06, "loss": 0.8471, "step": 277480 }, { "epoch": 4.871749855158974, "grad_norm": 0.05785954781628937, "learning_rate": 2.3345822847716273e-06, "loss": 0.8401, "step": 277490 }, { "epoch": 4.871925420038975, "grad_norm": 0.06631210507120402, "learning_rate": 2.333667515092185e-06, "loss": 0.831, "step": 277500 }, { "epoch": 4.872100984918977, "grad_norm": 0.04417037874497076, "learning_rate": 2.3327539955340712e-06, "loss": 0.8348, "step": 277510 }, { "epoch": 4.872276549798978, "grad_norm": 0.05113445704410173, "learning_rate": 2.3318417261088922e-06, "loss": 0.8405, "step": 277520 }, { "epoch": 4.87245211467898, "grad_norm": 0.04881209013074881, "learning_rate": 2.3309307068281786e-06, "loss": 0.8352, "step": 277530 }, { "epoch": 4.872627679558981, "grad_norm": 0.05819976046334759, "learning_rate": 2.3300209377034814e-06, "loss": 0.8274, "step": 277540 }, { "epoch": 4.872803244438982, "grad_norm": 0.0570634228875145, "learning_rate": 2.3291124187463307e-06, "loss": 0.8372, "step": 277550 }, { "epoch": 4.872978809318984, "grad_norm": 0.05175751448350101, "learning_rate": 2.3282051499682344e-06, "loss": 0.8368, "step": 277560 }, { "epoch": 4.873154374198985, "grad_norm": 0.05402693145926012, "learning_rate": 2.3272991313806893e-06, "loss": 0.8353, "step": 277570 }, { "epoch": 4.873329939078987, "grad_norm": 0.059394982057102745, "learning_rate": 2.326394362995181e-06, "loss": 0.8454, "step": 277580 }, { "epoch": 4.873505503958988, "grad_norm": 0.043150555593406865, "learning_rate": 2.3254908448231625e-06, "loss": 0.8386, "step": 277590 }, { "epoch": 4.873681068838989, "grad_norm": 0.054015204574329755, "learning_rate": 2.3245885768760864e-06, "loss": 0.8336, "step": 277600 }, { "epoch": 4.873856633718991, "grad_norm": 0.04318843061781092, "learning_rate": 2.3236875591653834e-06, "loss": 0.8249, "step": 277610 }, { "epoch": 4.874032198598992, "grad_norm": 0.058468849722661635, "learning_rate": 2.3227877917024744e-06, "loss": 0.8428, "step": 277620 }, { "epoch": 4.874207763478994, "grad_norm": 0.0663196693819406, "learning_rate": 2.3218892744987453e-06, "loss": 0.8384, "step": 277630 }, { "epoch": 4.874383328358995, "grad_norm": 0.04552678346590824, "learning_rate": 2.320992007565594e-06, "loss": 0.8404, "step": 277640 }, { "epoch": 4.874558893238996, "grad_norm": 0.04788939158724716, "learning_rate": 2.320095990914397e-06, "loss": 0.8316, "step": 277650 }, { "epoch": 4.874734458118998, "grad_norm": 0.08029834139414921, "learning_rate": 2.3192012245564858e-06, "loss": 0.8391, "step": 277660 }, { "epoch": 4.874910022998999, "grad_norm": 0.0489498986627386, "learning_rate": 2.3183077085032035e-06, "loss": 0.8396, "step": 277670 }, { "epoch": 4.875085587879001, "grad_norm": 0.05406835086921593, "learning_rate": 2.3174154427658816e-06, "loss": 0.8328, "step": 277680 }, { "epoch": 4.875261152759002, "grad_norm": 0.049601886440406244, "learning_rate": 2.31652442735582e-06, "loss": 0.8414, "step": 277690 }, { "epoch": 4.875436717639004, "grad_norm": 0.0588710816333404, "learning_rate": 2.315634662284306e-06, "loss": 0.8343, "step": 277700 }, { "epoch": 4.875612282519005, "grad_norm": 0.05438950186616897, "learning_rate": 2.3147461475626055e-06, "loss": 0.8367, "step": 277710 }, { "epoch": 4.875787847399006, "grad_norm": 0.0483107067094726, "learning_rate": 2.3138588832019954e-06, "loss": 0.8356, "step": 277720 }, { "epoch": 4.875963412279008, "grad_norm": 0.0620369504572558, "learning_rate": 2.3129728692136982e-06, "loss": 0.8379, "step": 277730 }, { "epoch": 4.876138977159009, "grad_norm": 0.061341783916448143, "learning_rate": 2.312088105608947e-06, "loss": 0.8396, "step": 277740 }, { "epoch": 4.876314542039011, "grad_norm": 0.0554070363706951, "learning_rate": 2.3112045923989522e-06, "loss": 0.832, "step": 277750 }, { "epoch": 4.876490106919012, "grad_norm": 0.05895349014674556, "learning_rate": 2.3103223295949146e-06, "loss": 0.8434, "step": 277760 }, { "epoch": 4.876665671799014, "grad_norm": 0.0571989668218448, "learning_rate": 2.309441317208011e-06, "loss": 0.8372, "step": 277770 }, { "epoch": 4.876841236679015, "grad_norm": 0.06320315504766369, "learning_rate": 2.3085615552493883e-06, "loss": 0.8396, "step": 277780 }, { "epoch": 4.877016801559016, "grad_norm": 0.057099118963570825, "learning_rate": 2.307683043730212e-06, "loss": 0.8409, "step": 277790 }, { "epoch": 4.877192366439018, "grad_norm": 0.08663020941453886, "learning_rate": 2.3068057826616057e-06, "loss": 0.8387, "step": 277800 }, { "epoch": 4.877367931319019, "grad_norm": 0.04515207942027647, "learning_rate": 2.305929772054682e-06, "loss": 0.8394, "step": 277810 }, { "epoch": 4.877543496199021, "grad_norm": 0.05229526692854949, "learning_rate": 2.305055011920541e-06, "loss": 0.8417, "step": 277820 }, { "epoch": 4.877719061079022, "grad_norm": 0.05088786043337373, "learning_rate": 2.3041815022702746e-06, "loss": 0.834, "step": 277830 }, { "epoch": 4.877894625959023, "grad_norm": 0.052798702070406524, "learning_rate": 2.303309243114938e-06, "loss": 0.8394, "step": 277840 }, { "epoch": 4.878070190839025, "grad_norm": 0.04268913493797968, "learning_rate": 2.302438234465589e-06, "loss": 0.842, "step": 277850 }, { "epoch": 4.878245755719026, "grad_norm": 0.04385788511983237, "learning_rate": 2.301568476333263e-06, "loss": 0.8315, "step": 277860 }, { "epoch": 4.878421320599028, "grad_norm": 0.049231253225883904, "learning_rate": 2.3006999687289842e-06, "loss": 0.8418, "step": 277870 }, { "epoch": 4.878596885479029, "grad_norm": 0.05242012726370273, "learning_rate": 2.299832711663744e-06, "loss": 0.837, "step": 277880 }, { "epoch": 4.87877245035903, "grad_norm": 0.17656638631343696, "learning_rate": 2.2989667051485553e-06, "loss": 0.8383, "step": 277890 }, { "epoch": 4.878948015239032, "grad_norm": 0.043898826029743675, "learning_rate": 2.298101949194354e-06, "loss": 0.8334, "step": 277900 }, { "epoch": 4.879123580119033, "grad_norm": 0.04843320078922225, "learning_rate": 2.297238443812132e-06, "loss": 0.8471, "step": 277910 }, { "epoch": 4.879299144999035, "grad_norm": 0.04344077113600318, "learning_rate": 2.296376189012814e-06, "loss": 0.8436, "step": 277920 }, { "epoch": 4.879474709879036, "grad_norm": 0.052921927643427784, "learning_rate": 2.2955151848073146e-06, "loss": 0.8303, "step": 277930 }, { "epoch": 4.879650274759038, "grad_norm": 0.04492382557362727, "learning_rate": 2.294655431206559e-06, "loss": 0.8482, "step": 277940 }, { "epoch": 4.879825839639039, "grad_norm": 0.05489876132898841, "learning_rate": 2.2937969282214392e-06, "loss": 0.8389, "step": 277950 }, { "epoch": 4.88000140451904, "grad_norm": 0.07227579802417675, "learning_rate": 2.2929396758628374e-06, "loss": 0.8351, "step": 277960 }, { "epoch": 4.8801769693990416, "grad_norm": 0.0633064561175574, "learning_rate": 2.29208367414159e-06, "loss": 0.842, "step": 277970 }, { "epoch": 4.880352534279043, "grad_norm": 0.054815895606689244, "learning_rate": 2.291228923068567e-06, "loss": 0.8355, "step": 277980 }, { "epoch": 4.8805280991590445, "grad_norm": 0.06498866483678091, "learning_rate": 2.2903754226545957e-06, "loss": 0.8357, "step": 277990 }, { "epoch": 4.8807036640390455, "grad_norm": 0.055785782429241364, "learning_rate": 2.2895231729104804e-06, "loss": 0.8397, "step": 278000 }, { "epoch": 4.8808792289190475, "grad_norm": 0.04100923565122837, "learning_rate": 2.288672173847025e-06, "loss": 0.84, "step": 278010 }, { "epoch": 4.8810547937990485, "grad_norm": 0.05900372105573216, "learning_rate": 2.2878224254750117e-06, "loss": 0.8418, "step": 278020 }, { "epoch": 4.8812303586790495, "grad_norm": 0.09698479942102946, "learning_rate": 2.2869739278052122e-06, "loss": 0.8322, "step": 278030 }, { "epoch": 4.8814059235590515, "grad_norm": 0.040062980763133214, "learning_rate": 2.2861266808483656e-06, "loss": 0.8499, "step": 278040 }, { "epoch": 4.8815814884390525, "grad_norm": 0.04289182117816468, "learning_rate": 2.2852806846152087e-06, "loss": 0.8358, "step": 278050 }, { "epoch": 4.881757053319054, "grad_norm": 0.04731217538521671, "learning_rate": 2.2844359391164706e-06, "loss": 0.8471, "step": 278060 }, { "epoch": 4.881932618199055, "grad_norm": 0.044729003322457925, "learning_rate": 2.283592444362844e-06, "loss": 0.8376, "step": 278070 }, { "epoch": 4.8821081830790565, "grad_norm": 0.06041796705566438, "learning_rate": 2.282750200365025e-06, "loss": 0.8501, "step": 278080 }, { "epoch": 4.882283747959058, "grad_norm": 0.05366805393258608, "learning_rate": 2.2819092071336747e-06, "loss": 0.8335, "step": 278090 }, { "epoch": 4.882459312839059, "grad_norm": 0.05063609869302914, "learning_rate": 2.2810694646794537e-06, "loss": 0.8377, "step": 278100 }, { "epoch": 4.882634877719061, "grad_norm": 0.04158450008760223, "learning_rate": 2.280230973013002e-06, "loss": 0.8449, "step": 278110 }, { "epoch": 4.882810442599062, "grad_norm": 0.04754579419919649, "learning_rate": 2.279393732144949e-06, "loss": 0.8359, "step": 278120 }, { "epoch": 4.882986007479063, "grad_norm": 0.055576526504273645, "learning_rate": 2.2785577420858886e-06, "loss": 0.8345, "step": 278130 }, { "epoch": 4.883161572359065, "grad_norm": 0.04848045112989425, "learning_rate": 2.277723002846428e-06, "loss": 0.8482, "step": 278140 }, { "epoch": 4.883337137239066, "grad_norm": 0.047667219301417825, "learning_rate": 2.27688951443713e-06, "loss": 0.8351, "step": 278150 }, { "epoch": 4.883512702119068, "grad_norm": 0.0482325798303706, "learning_rate": 2.2760572768685685e-06, "loss": 0.8289, "step": 278160 }, { "epoch": 4.883688266999069, "grad_norm": 0.06902302981277714, "learning_rate": 2.2752262901512827e-06, "loss": 0.8427, "step": 278170 }, { "epoch": 4.883863831879071, "grad_norm": 0.04864813090620879, "learning_rate": 2.2743965542957926e-06, "loss": 0.8385, "step": 278180 }, { "epoch": 4.884039396759072, "grad_norm": 0.049049771387816105, "learning_rate": 2.2735680693126274e-06, "loss": 0.8337, "step": 278190 }, { "epoch": 4.884214961639074, "grad_norm": 0.06349549147189137, "learning_rate": 2.2727408352122722e-06, "loss": 0.8404, "step": 278200 }, { "epoch": 4.884390526519075, "grad_norm": 0.047563218056591396, "learning_rate": 2.2719148520052134e-06, "loss": 0.8395, "step": 278210 }, { "epoch": 4.884566091399076, "grad_norm": 0.047378611869736714, "learning_rate": 2.2710901197019138e-06, "loss": 0.8392, "step": 278220 }, { "epoch": 4.884741656279078, "grad_norm": 0.052731741955891684, "learning_rate": 2.270266638312826e-06, "loss": 0.8385, "step": 278230 }, { "epoch": 4.884917221159079, "grad_norm": 0.05264190349071234, "learning_rate": 2.269444407848371e-06, "loss": 0.8425, "step": 278240 }, { "epoch": 4.885092786039081, "grad_norm": 0.058138778176642224, "learning_rate": 2.2686234283189884e-06, "loss": 0.8425, "step": 278250 }, { "epoch": 4.885268350919082, "grad_norm": 0.044505415544785765, "learning_rate": 2.267803699735067e-06, "loss": 0.8388, "step": 278260 }, { "epoch": 4.885443915799083, "grad_norm": 0.05650075823860792, "learning_rate": 2.266985222106992e-06, "loss": 0.8374, "step": 278270 }, { "epoch": 4.885619480679085, "grad_norm": 0.06539298645253816, "learning_rate": 2.2661679954451286e-06, "loss": 0.8345, "step": 278280 }, { "epoch": 4.885795045559086, "grad_norm": 0.054282053828018983, "learning_rate": 2.265352019759853e-06, "loss": 0.8369, "step": 278290 }, { "epoch": 4.885970610439088, "grad_norm": 0.056573522906115475, "learning_rate": 2.264537295061485e-06, "loss": 0.843, "step": 278300 }, { "epoch": 4.886146175319089, "grad_norm": 0.06748099569611775, "learning_rate": 2.263723821360346e-06, "loss": 0.8414, "step": 278310 }, { "epoch": 4.88632174019909, "grad_norm": 0.04400455933173876, "learning_rate": 2.2629115986667567e-06, "loss": 0.8343, "step": 278320 }, { "epoch": 4.886497305079092, "grad_norm": 0.054460474371262564, "learning_rate": 2.2621006269909937e-06, "loss": 0.8379, "step": 278330 }, { "epoch": 4.886672869959093, "grad_norm": 0.04205758264410936, "learning_rate": 2.2612909063433445e-06, "loss": 0.8373, "step": 278340 }, { "epoch": 4.886848434839095, "grad_norm": 0.055217750565734434, "learning_rate": 2.260482436734064e-06, "loss": 0.8409, "step": 278350 }, { "epoch": 4.887023999719096, "grad_norm": 0.06307567997969087, "learning_rate": 2.2596752181733858e-06, "loss": 0.8369, "step": 278360 }, { "epoch": 4.887199564599097, "grad_norm": 0.07310672063884452, "learning_rate": 2.258869250671553e-06, "loss": 0.8259, "step": 278370 }, { "epoch": 4.887375129479099, "grad_norm": 0.06162925353423119, "learning_rate": 2.258064534238765e-06, "loss": 0.8334, "step": 278380 }, { "epoch": 4.8875506943591, "grad_norm": 0.05127352227780282, "learning_rate": 2.257261068885234e-06, "loss": 0.8472, "step": 278390 }, { "epoch": 4.887726259239102, "grad_norm": 0.04409399464831387, "learning_rate": 2.2564588546211144e-06, "loss": 0.8351, "step": 278400 }, { "epoch": 4.887901824119103, "grad_norm": 0.050159388711853374, "learning_rate": 2.2556578914565957e-06, "loss": 0.8359, "step": 278410 }, { "epoch": 4.888077388999105, "grad_norm": 0.045226167319418295, "learning_rate": 2.2548581794018123e-06, "loss": 0.8391, "step": 278420 }, { "epoch": 4.888252953879106, "grad_norm": 0.050429314250120115, "learning_rate": 2.2540597184669086e-06, "loss": 0.8391, "step": 278430 }, { "epoch": 4.888428518759108, "grad_norm": 0.052557606607808734, "learning_rate": 2.2532625086619745e-06, "loss": 0.8409, "step": 278440 }, { "epoch": 4.888604083639109, "grad_norm": 0.04599643086267006, "learning_rate": 2.2524665499971434e-06, "loss": 0.8411, "step": 278450 }, { "epoch": 4.88877964851911, "grad_norm": 0.09521453958415674, "learning_rate": 2.2516718424824845e-06, "loss": 0.8363, "step": 278460 }, { "epoch": 4.888955213399112, "grad_norm": 0.05384470711025122, "learning_rate": 2.2508783861280647e-06, "loss": 0.8361, "step": 278470 }, { "epoch": 4.889130778279113, "grad_norm": 0.048855296033197514, "learning_rate": 2.250086180943941e-06, "loss": 0.8429, "step": 278480 }, { "epoch": 4.889306343159115, "grad_norm": 0.05327099847422694, "learning_rate": 2.249295226940149e-06, "loss": 0.8372, "step": 278490 }, { "epoch": 4.889481908039116, "grad_norm": 0.04683294212189078, "learning_rate": 2.2485055241267228e-06, "loss": 0.8334, "step": 278500 }, { "epoch": 4.889657472919117, "grad_norm": 0.0618024755718891, "learning_rate": 2.2477170725136435e-06, "loss": 0.8405, "step": 278510 }, { "epoch": 4.889833037799119, "grad_norm": 0.06839605901016345, "learning_rate": 2.2469298721109236e-06, "loss": 0.8412, "step": 278520 }, { "epoch": 4.89000860267912, "grad_norm": 0.07712691406268805, "learning_rate": 2.2461439229285208e-06, "loss": 0.8393, "step": 278530 }, { "epoch": 4.890184167559122, "grad_norm": 0.06665950976515594, "learning_rate": 2.245359224976404e-06, "loss": 0.8299, "step": 278540 }, { "epoch": 4.890359732439123, "grad_norm": 0.07399322024581181, "learning_rate": 2.244575778264521e-06, "loss": 0.8347, "step": 278550 }, { "epoch": 4.890535297319124, "grad_norm": 0.03979243302458181, "learning_rate": 2.243793582802775e-06, "loss": 0.8326, "step": 278560 }, { "epoch": 4.890710862199126, "grad_norm": 0.052175661813315004, "learning_rate": 2.2430126386011006e-06, "loss": 0.833, "step": 278570 }, { "epoch": 4.890886427079127, "grad_norm": 0.05261326169584823, "learning_rate": 2.2422329456693804e-06, "loss": 0.842, "step": 278580 }, { "epoch": 4.891061991959129, "grad_norm": 0.04878331209543456, "learning_rate": 2.241454504017495e-06, "loss": 0.8314, "step": 278590 }, { "epoch": 4.89123755683913, "grad_norm": 0.04689565832430067, "learning_rate": 2.240677313655303e-06, "loss": 0.8478, "step": 278600 }, { "epoch": 4.891413121719132, "grad_norm": 0.05221460430315399, "learning_rate": 2.2399013745926643e-06, "loss": 0.8341, "step": 278610 }, { "epoch": 4.891588686599133, "grad_norm": 0.05145104945388073, "learning_rate": 2.239126686839394e-06, "loss": 0.8343, "step": 278620 }, { "epoch": 4.891764251479134, "grad_norm": 0.051053261118615474, "learning_rate": 2.238353250405328e-06, "loss": 0.8352, "step": 278630 }, { "epoch": 4.891939816359136, "grad_norm": 0.04880858847466602, "learning_rate": 2.2375810653002395e-06, "loss": 0.8385, "step": 278640 }, { "epoch": 4.892115381239137, "grad_norm": 0.05543594798725162, "learning_rate": 2.236810131533942e-06, "loss": 0.846, "step": 278650 }, { "epoch": 4.892290946119139, "grad_norm": 0.05192552768549944, "learning_rate": 2.2360404491161743e-06, "loss": 0.833, "step": 278660 }, { "epoch": 4.89246651099914, "grad_norm": 0.057205350420153366, "learning_rate": 2.235272018056708e-06, "loss": 0.8299, "step": 278670 }, { "epoch": 4.892642075879142, "grad_norm": 0.04963350882636005, "learning_rate": 2.2345048383652808e-06, "loss": 0.8445, "step": 278680 }, { "epoch": 4.892817640759143, "grad_norm": 0.05326250889020277, "learning_rate": 2.2337389100515977e-06, "loss": 0.839, "step": 278690 }, { "epoch": 4.892993205639144, "grad_norm": 0.05072235350829191, "learning_rate": 2.2329742331253643e-06, "loss": 0.8342, "step": 278700 }, { "epoch": 4.893168770519146, "grad_norm": 0.06487674690735258, "learning_rate": 2.232210807596286e-06, "loss": 0.8417, "step": 278710 }, { "epoch": 4.893344335399147, "grad_norm": 0.05521036590664638, "learning_rate": 2.231448633474024e-06, "loss": 0.8334, "step": 278720 }, { "epoch": 4.8935199002791485, "grad_norm": 0.055803420315471926, "learning_rate": 2.230687710768239e-06, "loss": 0.8356, "step": 278730 }, { "epoch": 4.8936954651591495, "grad_norm": 0.056248982327419414, "learning_rate": 2.2299280394885603e-06, "loss": 0.8479, "step": 278740 }, { "epoch": 4.893871030039151, "grad_norm": 0.059976376334121805, "learning_rate": 2.229169619644637e-06, "loss": 0.8387, "step": 278750 }, { "epoch": 4.8940465949191525, "grad_norm": 0.06209168189454056, "learning_rate": 2.2284124512460547e-06, "loss": 0.8382, "step": 278760 }, { "epoch": 4.8942221597991535, "grad_norm": 0.06878826917141462, "learning_rate": 2.2276565343024187e-06, "loss": 0.8415, "step": 278770 }, { "epoch": 4.8943977246791555, "grad_norm": 0.04649203747074873, "learning_rate": 2.2269018688232913e-06, "loss": 0.8378, "step": 278780 }, { "epoch": 4.8945732895591565, "grad_norm": 0.055556209128093594, "learning_rate": 2.226148454818258e-06, "loss": 0.8379, "step": 278790 }, { "epoch": 4.8947488544391575, "grad_norm": 0.05496131926505662, "learning_rate": 2.225396292296858e-06, "loss": 0.8367, "step": 278800 }, { "epoch": 4.8949244193191594, "grad_norm": 0.05801774918334893, "learning_rate": 2.2246453812686103e-06, "loss": 0.8405, "step": 278810 }, { "epoch": 4.8950999841991605, "grad_norm": 0.0472910479809456, "learning_rate": 2.223895721743044e-06, "loss": 0.8295, "step": 278820 }, { "epoch": 4.895275549079162, "grad_norm": 0.05122560454556435, "learning_rate": 2.2231473137296447e-06, "loss": 0.8389, "step": 278830 }, { "epoch": 4.895451113959163, "grad_norm": 0.056013324072003, "learning_rate": 2.2224001572378974e-06, "loss": 0.8304, "step": 278840 }, { "epoch": 4.895626678839165, "grad_norm": 0.05012957623410529, "learning_rate": 2.2216542522772664e-06, "loss": 0.8434, "step": 278850 }, { "epoch": 4.895802243719166, "grad_norm": 0.06925939268954806, "learning_rate": 2.220909598857214e-06, "loss": 0.8391, "step": 278860 }, { "epoch": 4.895977808599168, "grad_norm": 0.055425291918549446, "learning_rate": 2.2201661969871607e-06, "loss": 0.8315, "step": 278870 }, { "epoch": 4.896153373479169, "grad_norm": 0.046764257468795256, "learning_rate": 2.2194240466765473e-06, "loss": 0.8364, "step": 278880 }, { "epoch": 4.89632893835917, "grad_norm": 0.061332902399378136, "learning_rate": 2.2186831479347496e-06, "loss": 0.8358, "step": 278890 }, { "epoch": 4.896504503239172, "grad_norm": 0.04812721115076412, "learning_rate": 2.217943500771176e-06, "loss": 0.8343, "step": 278900 }, { "epoch": 4.896680068119173, "grad_norm": 0.049910574815562644, "learning_rate": 2.2172051051951796e-06, "loss": 0.8371, "step": 278910 }, { "epoch": 4.896855632999175, "grad_norm": 0.05550576573079736, "learning_rate": 2.2164679612161366e-06, "loss": 0.8402, "step": 278920 }, { "epoch": 4.897031197879176, "grad_norm": 0.04787282432936886, "learning_rate": 2.2157320688433667e-06, "loss": 0.8457, "step": 278930 }, { "epoch": 4.897206762759177, "grad_norm": 0.05563499437827437, "learning_rate": 2.2149974280862135e-06, "loss": 0.8418, "step": 278940 }, { "epoch": 4.897382327639179, "grad_norm": 0.050404840686993475, "learning_rate": 2.214264038953964e-06, "loss": 0.8409, "step": 278950 }, { "epoch": 4.89755789251918, "grad_norm": 0.048198400712412263, "learning_rate": 2.2135319014559274e-06, "loss": 0.8399, "step": 278960 }, { "epoch": 4.897733457399182, "grad_norm": 0.05052357082306422, "learning_rate": 2.21280101560137e-06, "loss": 0.8343, "step": 278970 }, { "epoch": 4.897909022279183, "grad_norm": 0.057731122713568624, "learning_rate": 2.2120713813995677e-06, "loss": 0.8375, "step": 278980 }, { "epoch": 4.898084587159184, "grad_norm": 0.05081446340645816, "learning_rate": 2.211342998859743e-06, "loss": 0.842, "step": 278990 }, { "epoch": 4.898260152039186, "grad_norm": 0.05587891997816039, "learning_rate": 2.210615867991139e-06, "loss": 0.8335, "step": 279000 }, { "epoch": 4.898435716919187, "grad_norm": 0.06035027903754979, "learning_rate": 2.2098899888029655e-06, "loss": 0.8417, "step": 279010 }, { "epoch": 4.898611281799189, "grad_norm": 0.046754470594056664, "learning_rate": 2.2091653613044126e-06, "loss": 0.833, "step": 279020 }, { "epoch": 4.89878684667919, "grad_norm": 0.04032333011065203, "learning_rate": 2.208441985504668e-06, "loss": 0.8371, "step": 279030 }, { "epoch": 4.898962411559191, "grad_norm": 0.05167981130926976, "learning_rate": 2.207719861412899e-06, "loss": 0.8376, "step": 279040 }, { "epoch": 4.899137976439193, "grad_norm": 0.06430372013647201, "learning_rate": 2.2069989890382607e-06, "loss": 0.8385, "step": 279050 }, { "epoch": 4.899313541319194, "grad_norm": 0.05034317800361945, "learning_rate": 2.206279368389876e-06, "loss": 0.8318, "step": 279060 }, { "epoch": 4.899489106199196, "grad_norm": 0.04825736862765043, "learning_rate": 2.205560999476857e-06, "loss": 0.8343, "step": 279070 }, { "epoch": 4.899664671079197, "grad_norm": 0.07276220921868239, "learning_rate": 2.2048438823083257e-06, "loss": 0.8405, "step": 279080 }, { "epoch": 4.899840235959199, "grad_norm": 0.053154753193462245, "learning_rate": 2.20412801689335e-06, "loss": 0.8333, "step": 279090 }, { "epoch": 4.9000158008392, "grad_norm": 0.04331622805782756, "learning_rate": 2.2034134032409986e-06, "loss": 0.835, "step": 279100 }, { "epoch": 4.900191365719202, "grad_norm": 0.0444677905484624, "learning_rate": 2.202700041360349e-06, "loss": 0.8309, "step": 279110 }, { "epoch": 4.900366930599203, "grad_norm": 0.05015715321949416, "learning_rate": 2.2019879312604146e-06, "loss": 0.8347, "step": 279120 }, { "epoch": 4.900542495479204, "grad_norm": 0.043173930789633067, "learning_rate": 2.20127707295023e-06, "loss": 0.8319, "step": 279130 }, { "epoch": 4.900718060359206, "grad_norm": 0.06468821655756614, "learning_rate": 2.200567466438797e-06, "loss": 0.8279, "step": 279140 }, { "epoch": 4.900893625239207, "grad_norm": 0.04231210687396608, "learning_rate": 2.1998591117351178e-06, "loss": 0.8406, "step": 279150 }, { "epoch": 4.901069190119209, "grad_norm": 0.07191216745453653, "learning_rate": 2.1991520088481494e-06, "loss": 0.8399, "step": 279160 }, { "epoch": 4.90124475499921, "grad_norm": 0.04993714945850589, "learning_rate": 2.198446157786861e-06, "loss": 0.835, "step": 279170 }, { "epoch": 4.901420319879211, "grad_norm": 0.05761185434038643, "learning_rate": 2.1977415585602004e-06, "loss": 0.84, "step": 279180 }, { "epoch": 4.901595884759213, "grad_norm": 0.041888489082501035, "learning_rate": 2.19703821117708e-06, "loss": 0.8478, "step": 279190 }, { "epoch": 4.901771449639214, "grad_norm": 0.04805956843946023, "learning_rate": 2.1963361156464253e-06, "loss": 0.837, "step": 279200 }, { "epoch": 4.901947014519216, "grad_norm": 0.08199819254167685, "learning_rate": 2.1956352719771282e-06, "loss": 0.8457, "step": 279210 }, { "epoch": 4.902122579399217, "grad_norm": 0.058827150423677414, "learning_rate": 2.1949356801780804e-06, "loss": 0.8383, "step": 279220 }, { "epoch": 4.902298144279218, "grad_norm": 0.04485202712486317, "learning_rate": 2.1942373402581198e-06, "loss": 0.8375, "step": 279230 }, { "epoch": 4.90247370915922, "grad_norm": 0.04649297824535971, "learning_rate": 2.1935402522261037e-06, "loss": 0.8376, "step": 279240 }, { "epoch": 4.902649274039221, "grad_norm": 0.05114129542404288, "learning_rate": 2.192844416090881e-06, "loss": 0.8353, "step": 279250 }, { "epoch": 4.902824838919223, "grad_norm": 0.046469863676526744, "learning_rate": 2.1921498318612557e-06, "loss": 0.8423, "step": 279260 }, { "epoch": 4.903000403799224, "grad_norm": 0.05144290962780012, "learning_rate": 2.1914564995460207e-06, "loss": 0.8348, "step": 279270 }, { "epoch": 4.903175968679226, "grad_norm": 0.047777590885201775, "learning_rate": 2.190764419153968e-06, "loss": 0.8348, "step": 279280 }, { "epoch": 4.903351533559227, "grad_norm": 0.07157630274946687, "learning_rate": 2.1900735906938705e-06, "loss": 0.8464, "step": 279290 }, { "epoch": 4.903527098439228, "grad_norm": 0.04617307671764577, "learning_rate": 2.1893840141744752e-06, "loss": 0.843, "step": 279300 }, { "epoch": 4.90370266331923, "grad_norm": 0.05980516433723175, "learning_rate": 2.1886956896045213e-06, "loss": 0.8365, "step": 279310 }, { "epoch": 4.903878228199231, "grad_norm": 0.07316911806676787, "learning_rate": 2.1880086169927357e-06, "loss": 0.8306, "step": 279320 }, { "epoch": 4.904053793079233, "grad_norm": 0.04729600073509166, "learning_rate": 2.1873227963478124e-06, "loss": 0.8367, "step": 279330 }, { "epoch": 4.904229357959234, "grad_norm": 0.043056974917395026, "learning_rate": 2.1866382276784453e-06, "loss": 0.8365, "step": 279340 }, { "epoch": 4.904404922839236, "grad_norm": 0.05043370629262698, "learning_rate": 2.185954910993317e-06, "loss": 0.8404, "step": 279350 }, { "epoch": 4.904580487719237, "grad_norm": 0.052100155729152824, "learning_rate": 2.1852728463010783e-06, "loss": 0.8307, "step": 279360 }, { "epoch": 4.904756052599238, "grad_norm": 0.05175030584511767, "learning_rate": 2.184592033610357e-06, "loss": 0.8378, "step": 279370 }, { "epoch": 4.90493161747924, "grad_norm": 0.05335181932513785, "learning_rate": 2.1839124729298036e-06, "loss": 0.8333, "step": 279380 }, { "epoch": 4.905107182359241, "grad_norm": 0.08539987228708797, "learning_rate": 2.1832341642680126e-06, "loss": 0.8329, "step": 279390 }, { "epoch": 4.905282747239243, "grad_norm": 0.05093353269388306, "learning_rate": 2.1825571076335904e-06, "loss": 0.8331, "step": 279400 }, { "epoch": 4.905458312119244, "grad_norm": 0.04104946421152567, "learning_rate": 2.1818813030350988e-06, "loss": 0.8422, "step": 279410 }, { "epoch": 4.905633876999245, "grad_norm": 0.07324502464453338, "learning_rate": 2.181206750481112e-06, "loss": 0.8352, "step": 279420 }, { "epoch": 4.905809441879247, "grad_norm": 0.056873450190901134, "learning_rate": 2.18053344998018e-06, "loss": 0.8361, "step": 279430 }, { "epoch": 4.905985006759248, "grad_norm": 0.04838378960420139, "learning_rate": 2.1798614015408214e-06, "loss": 0.8349, "step": 279440 }, { "epoch": 4.90616057163925, "grad_norm": 0.04010266085159644, "learning_rate": 2.1791906051715546e-06, "loss": 0.8385, "step": 279450 }, { "epoch": 4.906336136519251, "grad_norm": 0.06160821926695372, "learning_rate": 2.1785210608808865e-06, "loss": 0.8392, "step": 279460 }, { "epoch": 4.906511701399252, "grad_norm": 0.04326563547929671, "learning_rate": 2.1778527686773025e-06, "loss": 0.8348, "step": 279470 }, { "epoch": 4.906687266279254, "grad_norm": 0.0644465843623066, "learning_rate": 2.1771857285692544e-06, "loss": 0.8468, "step": 279480 }, { "epoch": 4.906862831159255, "grad_norm": 0.05352062705063244, "learning_rate": 2.176519940565206e-06, "loss": 0.8334, "step": 279490 }, { "epoch": 4.9070383960392565, "grad_norm": 0.047710068235627824, "learning_rate": 2.175855404673587e-06, "loss": 0.839, "step": 279500 }, { "epoch": 4.9072139609192575, "grad_norm": 0.06324675606614232, "learning_rate": 2.175192120902818e-06, "loss": 0.8343, "step": 279510 }, { "epoch": 4.9073895257992595, "grad_norm": 0.04956125167354765, "learning_rate": 2.1745300892613057e-06, "loss": 0.8352, "step": 279520 }, { "epoch": 4.9075650906792605, "grad_norm": 0.037112469927390895, "learning_rate": 2.1738693097574368e-06, "loss": 0.8366, "step": 279530 }, { "epoch": 4.907740655559262, "grad_norm": 0.050669385771234345, "learning_rate": 2.173209782399587e-06, "loss": 0.8409, "step": 279540 }, { "epoch": 4.9079162204392635, "grad_norm": 0.04574227321383287, "learning_rate": 2.172551507196109e-06, "loss": 0.8363, "step": 279550 }, { "epoch": 4.9080917853192645, "grad_norm": 0.04551084246897065, "learning_rate": 2.171894484155335e-06, "loss": 0.8434, "step": 279560 }, { "epoch": 4.908267350199266, "grad_norm": 0.04361574789713374, "learning_rate": 2.171238713285606e-06, "loss": 0.8369, "step": 279570 }, { "epoch": 4.908442915079267, "grad_norm": 0.04724029641923724, "learning_rate": 2.1705841945952224e-06, "loss": 0.8461, "step": 279580 }, { "epoch": 4.908618479959269, "grad_norm": 0.052440321673310926, "learning_rate": 2.169930928092481e-06, "loss": 0.8391, "step": 279590 }, { "epoch": 4.90879404483927, "grad_norm": 0.04581415199248438, "learning_rate": 2.169278913785647e-06, "loss": 0.8354, "step": 279600 }, { "epoch": 4.908969609719271, "grad_norm": 0.05226010322180794, "learning_rate": 2.1686281516829983e-06, "loss": 0.8463, "step": 279610 }, { "epoch": 4.909145174599273, "grad_norm": 0.044276340760401156, "learning_rate": 2.1679786417927654e-06, "loss": 0.8398, "step": 279620 }, { "epoch": 4.909320739479274, "grad_norm": 0.05288930929219624, "learning_rate": 2.167330384123193e-06, "loss": 0.8384, "step": 279630 }, { "epoch": 4.909496304359276, "grad_norm": 0.05755266905539326, "learning_rate": 2.1666833786824803e-06, "loss": 0.8417, "step": 279640 }, { "epoch": 4.909671869239277, "grad_norm": 0.06568486436433703, "learning_rate": 2.1660376254788266e-06, "loss": 0.8322, "step": 279650 }, { "epoch": 4.909847434119278, "grad_norm": 0.05035782646462016, "learning_rate": 2.1653931245204205e-06, "loss": 0.8402, "step": 279660 }, { "epoch": 4.91002299899928, "grad_norm": 0.06210543654294787, "learning_rate": 2.1647498758154284e-06, "loss": 0.8357, "step": 279670 }, { "epoch": 4.910198563879281, "grad_norm": 0.04704650552818354, "learning_rate": 2.1641078793719945e-06, "loss": 0.841, "step": 279680 }, { "epoch": 4.910374128759283, "grad_norm": 0.04916405314266536, "learning_rate": 2.1634671351982527e-06, "loss": 0.8412, "step": 279690 }, { "epoch": 4.910549693639284, "grad_norm": 0.0527733013129654, "learning_rate": 2.1628276433023254e-06, "loss": 0.8419, "step": 279700 }, { "epoch": 4.910725258519285, "grad_norm": 0.049605644519071325, "learning_rate": 2.1621894036923127e-06, "loss": 0.8402, "step": 279710 }, { "epoch": 4.910900823399287, "grad_norm": 0.050821098745100934, "learning_rate": 2.161552416376305e-06, "loss": 0.8354, "step": 279720 }, { "epoch": 4.911076388279288, "grad_norm": 0.06046703143729991, "learning_rate": 2.1609166813623694e-06, "loss": 0.8378, "step": 279730 }, { "epoch": 4.91125195315929, "grad_norm": 0.05044453773583008, "learning_rate": 2.1602821986585617e-06, "loss": 0.839, "step": 279740 }, { "epoch": 4.911427518039291, "grad_norm": 0.05514556390601098, "learning_rate": 2.1596489682729286e-06, "loss": 0.8358, "step": 279750 }, { "epoch": 4.911603082919293, "grad_norm": 0.057981331300718766, "learning_rate": 2.1590169902134824e-06, "loss": 0.8416, "step": 279760 }, { "epoch": 4.911778647799294, "grad_norm": 0.05426131070894731, "learning_rate": 2.1583862644882244e-06, "loss": 0.8426, "step": 279770 }, { "epoch": 4.911954212679296, "grad_norm": 0.05641558529129608, "learning_rate": 2.157756791105167e-06, "loss": 0.8375, "step": 279780 }, { "epoch": 4.912129777559297, "grad_norm": 0.05642186222656356, "learning_rate": 2.157128570072269e-06, "loss": 0.8468, "step": 279790 }, { "epoch": 4.912305342439298, "grad_norm": 0.03954130662683614, "learning_rate": 2.1565016013974975e-06, "loss": 0.8389, "step": 279800 }, { "epoch": 4.9124809073193, "grad_norm": 0.059825246012372255, "learning_rate": 2.155875885088789e-06, "loss": 0.8367, "step": 279810 }, { "epoch": 4.912656472199301, "grad_norm": 0.06253953049105121, "learning_rate": 2.1552514211540793e-06, "loss": 0.829, "step": 279820 }, { "epoch": 4.912832037079303, "grad_norm": 0.0551245140919602, "learning_rate": 2.1546282096012704e-06, "loss": 0.8307, "step": 279830 }, { "epoch": 4.913007601959304, "grad_norm": 0.0534558208617877, "learning_rate": 2.1540062504382765e-06, "loss": 0.8365, "step": 279840 }, { "epoch": 4.913183166839305, "grad_norm": 0.04717544386298845, "learning_rate": 2.153385543672967e-06, "loss": 0.8435, "step": 279850 }, { "epoch": 4.913358731719307, "grad_norm": 0.04772322205221167, "learning_rate": 2.152766089313201e-06, "loss": 0.8333, "step": 279860 }, { "epoch": 4.913534296599308, "grad_norm": 0.05597869377708241, "learning_rate": 2.1521478873668367e-06, "loss": 0.8381, "step": 279870 }, { "epoch": 4.91370986147931, "grad_norm": 0.05213046536879911, "learning_rate": 2.1515309378416894e-06, "loss": 0.8317, "step": 279880 }, { "epoch": 4.913885426359311, "grad_norm": 0.05700192271910947, "learning_rate": 2.1509152407456065e-06, "loss": 0.8355, "step": 279890 }, { "epoch": 4.914060991239312, "grad_norm": 0.04657504380023435, "learning_rate": 2.15030079608637e-06, "loss": 0.8401, "step": 279900 }, { "epoch": 4.914236556119314, "grad_norm": 0.05659475025374277, "learning_rate": 2.1496876038717616e-06, "loss": 0.8376, "step": 279910 }, { "epoch": 4.914412120999315, "grad_norm": 0.11344765372029422, "learning_rate": 2.1490756641095516e-06, "loss": 0.8453, "step": 279920 }, { "epoch": 4.914587685879317, "grad_norm": 0.055910393210492064, "learning_rate": 2.1484649768075e-06, "loss": 0.8407, "step": 279930 }, { "epoch": 4.914763250759318, "grad_norm": 0.05581482747389268, "learning_rate": 2.1478555419733452e-06, "loss": 0.8459, "step": 279940 }, { "epoch": 4.91493881563932, "grad_norm": 0.047898834586171536, "learning_rate": 2.1472473596148137e-06, "loss": 0.836, "step": 279950 }, { "epoch": 4.915114380519321, "grad_norm": 0.051149828668926034, "learning_rate": 2.146640429739588e-06, "loss": 0.8442, "step": 279960 }, { "epoch": 4.915289945399322, "grad_norm": 0.05359341235530319, "learning_rate": 2.1460347523553847e-06, "loss": 0.8296, "step": 279970 }, { "epoch": 4.915465510279324, "grad_norm": 0.05467313358930575, "learning_rate": 2.1454303274698634e-06, "loss": 0.8395, "step": 279980 }, { "epoch": 4.915641075159325, "grad_norm": 0.04736530029630476, "learning_rate": 2.1448271550906754e-06, "loss": 0.8312, "step": 279990 }, { "epoch": 4.915816640039327, "grad_norm": 0.04552368774162066, "learning_rate": 2.1442252352254805e-06, "loss": 0.8492, "step": 280000 } ], "logging_steps": 10, "max_steps": 284795, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.422163750513541e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }